diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-19 11:18:48 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-19 11:28:22 +0300 |
commit | 6258e6efc801f6a94c329d149763c5650c507ec1 (patch) | |
tree | 4f94c6d4667bc32fbcf5cb9c5944c9a57da32c53 | |
parent | 1e95bf12278f195bc3450875f66f7304b8b269b9 (diff) | |
download | ydb-6258e6efc801f6a94c329d149763c5650c507ec1.tar.gz |
Intermediate changes
commit_hash:1338f91346a59e6bbff8c8f2444e990a51dc2e63
11 files changed, 71 insertions, 58 deletions
diff --git a/contrib/libs/croaring/.yandex_meta/override.nix b/contrib/libs/croaring/.yandex_meta/override.nix index fa1cd2b260..7c23a64e1a 100644 --- a/contrib/libs/croaring/.yandex_meta/override.nix +++ b/contrib/libs/croaring/.yandex_meta/override.nix @@ -1,12 +1,12 @@ pkgs: attrs: with pkgs; with attrs; rec { pname = "croaring"; - version = "4.2.0"; + version = "4.2.1"; src = fetchFromGitHub { owner = "RoaringBitmap"; repo = "CRoaring"; rev = "v${version}"; - hash = "sha256-PzwtQDAsnRGIjeb3Ax6qqXtdEqtwaCWsj6g46J3Oqm0="; + hash = "sha256-qOFkDu0JM+wBIlGGyewojicCp2pmtr643J3dW6el+O4="; }; patches = []; diff --git a/contrib/libs/croaring/README.md b/contrib/libs/croaring/README.md index eb5ee92752..0f938004c9 100644 --- a/contrib/libs/croaring/README.md +++ b/contrib/libs/croaring/README.md @@ -530,26 +530,26 @@ bitset_free(b); // frees memory More advanced example: ```C - bitset_t *b = bitset_create(); - for (int k = 0; k < 1000; ++k) { - bitset_set(b, 3 * k); - } - // We have bitset_count(b) == 1000. - // We have bitset_get(b, 3) is true - // You can iterate through the values: - size_t k = 0; - for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { - // You will have i == k - k += 3; - } - // We support a wide range of operations on two bitsets such as - // bitset_inplace_symmetric_difference(b1,b2); - // bitset_inplace_symmetric_difference(b1,b2); - // bitset_inplace_difference(b1,b2);// should make no difference - // bitset_inplace_union(b1,b2); - // bitset_inplace_intersection(b1,b2); - // bitsets_disjoint - // bitsets_intersect +bitset_t *b = bitset_create(); +for (int k = 0; k < 1000; ++k) { + bitset_set(b, 3 * k); +} +// We have bitset_count(b) == 1000. +// We have bitset_get(b, 3) is true +// You can iterate through the values: +size_t k = 0; +for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { + // You will have i == k + k += 3; +} +// We support a wide range of operations on two bitsets such as +// bitset_inplace_symmetric_difference(b1,b2); +// bitset_inplace_symmetric_difference(b1,b2); +// bitset_inplace_difference(b1,b2);// should make no difference +// bitset_inplace_union(b1,b2); +// bitset_inplace_intersection(b1,b2); +// bitsets_disjoint +// bitsets_intersect ``` In some instances, you may want to convert a Roaring bitmap into a conventional (uncompressed) bitset. @@ -557,28 +557,28 @@ Indeed, bitsets have advantages such as higher query performances in some cases. illustrates how you may do so: ```C - roaring_bitmap_t *r1 = roaring_bitmap_create(); - for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { +roaring_bitmap_t *r1 = roaring_bitmap_create(); +for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { roaring_bitmap_add(r1, i); - } - for (uint32_t i = 100000; i < 500000; i+= 100) { +} +for (uint32_t i = 100000; i < 500000; i+= 100) { roaring_bitmap_add(r1, i); - } - roaring_bitmap_add_range(r1, 500000, 600000); - bitset_t * bitset = bitset_create(); - bool success = roaring_bitmap_to_bitset(r1, bitset); - assert(success); // could fail due to memory allocation. - assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1)); - // You can then query the bitset: - for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { - assert(bitset_get(bitset,i)); - } - for (uint32_t i = 100000; i < 500000; i+= 100) { - assert(bitset_get(bitset,i)); - } - // you must free the memory: - bitset_free(bitset); - roaring_bitmap_free(r1); +} +roaring_bitmap_add_range(r1, 500000, 600000); +bitset_t * bitset = bitset_create(); +bool success = roaring_bitmap_to_bitset(r1, bitset); +assert(success); // could fail due to memory allocation. +assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1)); +// You can then query the bitset: +for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { + assert(bitset_get(bitset,i)); +} +for (uint32_t i = 100000; i < 500000; i+= 100) { + assert(bitset_get(bitset,i)); +} +// you must free the memory: +bitset_free(bitset); +roaring_bitmap_free(r1); ``` You should be aware that a convention bitset (`bitset_t *`) may use much more diff --git a/contrib/libs/croaring/include/roaring/roaring_version.h b/contrib/libs/croaring/include/roaring/roaring_version.h index 33926a2102..aad63adecb 100644 --- a/contrib/libs/croaring/include/roaring/roaring_version.h +++ b/contrib/libs/croaring/include/roaring/roaring_version.h @@ -2,11 +2,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.2.0" +#define ROARING_VERSION "4.2.1" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_REVISION = 1 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on
\ No newline at end of file diff --git a/contrib/libs/croaring/src/containers/bitset.c b/contrib/libs/croaring/src/containers/bitset.c index 7b84af82ec..7a38d072b3 100644 --- a/contrib/libs/croaring/src/containers/bitset.c +++ b/contrib/libs/croaring/src/containers/bitset.c @@ -904,7 +904,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ } \ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ const bitset_container_t *src_2) { \ - printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_1 = src_1->words; \ const uint64_t * __restrict__ words_2 = src_2->words; \ int32_t sum = 0; \ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ diff --git a/contrib/libs/croaring/ya.make b/contrib/libs/croaring/ya.make index 78b8b40c9d..63e87a8b2a 100644 --- a/contrib/libs/croaring/ya.make +++ b/contrib/libs/croaring/ya.make @@ -10,9 +10,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(4.2.0) +VERSION(4.2.1) -ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.2.0.tar.gz) +ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.2.1.tar.gz) ADDINCL( GLOBAL contrib/libs/croaring/include diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA index 24f7a78836..bb928a4bc2 100644 --- a/contrib/python/clickhouse-connect/.dist-info/METADATA +++ b/contrib/python/clickhouse-connect/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: clickhouse-connect -Version: 0.8.1 +Version: 0.8.2 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset Home-page: https://github.com/ClickHouse/clickhouse-connect Author: ClickHouse Inc. diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py index 398cfc4c74..c400d68132 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py @@ -1 +1 @@ -version = '0.8.1' +version = '0.8.2' diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py index fe11c27883..d6b84885e4 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py @@ -59,9 +59,15 @@ class Client(ABC): """ self.query_limit = coerce_int(query_limit) self.query_retries = coerce_int(query_retries) + if database and not database == '__default__': + self.database = database if show_clickhouse_errors is not None: self.show_clickhouse_errors = coerce_bool(show_clickhouse_errors) self.server_host_name = server_host_name + self.uri = uri + self._init_common_settings(apply_server_timezone) + + def _init_common_settings(self, apply_server_timezone:Optional[Union[str, bool]] ): self.server_tz, dst_safe = pytz.UTC, True self.server_version, server_tz = \ tuple(self.command('SELECT version(), timezone()', use_database=False)) @@ -83,8 +89,7 @@ class Client(ABC): readonly = common.get_setting('readonly') server_settings = self.query(f'SELECT name, value, {readonly} as readonly FROM system.settings LIMIT 10000') self.server_settings = {row['name']: SettingDef(**row) for row in server_settings.named_results()} - if database and not database == '__default__': - self.database = database + if self.min_version(CH_VERSION_WITH_PROTOCOL): # Unfortunately we have to validate that the client protocol version is actually used by ClickHouse # since the query parameter could be stripped off (in particular, by CHProxy) @@ -95,7 +100,9 @@ class Client(ABC): self.protocol_version = PROTOCOL_VERSION_WITH_LOW_CARD if self._setting_status('date_time_input_format').is_writable: self.set_client_setting('date_time_input_format', 'best_effort') - self.uri = uri + if self._setting_status('allow_experimental_json_type').is_set: + self.set_client_setting('cast_string_to_dynamic_use_inference', '1') + def _validate_settings(self, settings: Optional[Dict[str, Any]]) -> Dict[str, str]: """ @@ -655,7 +662,8 @@ class Client(ABC): settings=settings, context=context) def insert_arrow(self, table: str, - arrow_table, database: str = None, + arrow_table, + database: str = None, settings: Optional[Dict] = None) -> QuerySummary: """ Insert a PyArrow table DataFrame into ClickHouse using raw Arrow format @@ -666,7 +674,8 @@ class Client(ABC): :return: QuerySummary with summary information, throws exception if insert fails """ full_table = table if '.' in table or not database else f'{database}.{table}' - column_names, insert_block = arrow_buffer(arrow_table) + compression = self.write_compression if self.write_compression in ('zstd', 'lz4') else None + column_names, insert_block = arrow_buffer(arrow_table, compression) return self.raw_insert(full_table, column_names, insert_block, settings, 'Arrow') def create_insert_context(self, diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py index 58b5460a59..558d66f614 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py @@ -244,7 +244,8 @@ class ResponseSource: else: chunk = chunks.popleft() current_size -= len(chunk) - yield chunk + if chunk: + yield chunk self.gen = buffered() diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py index 54edbeff09..bd10270e71 100644 --- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py +++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py @@ -374,9 +374,12 @@ def to_arrow_batches(buffer: IOBase) -> StreamContext: return StreamContext(buffer, reader) -def arrow_buffer(table) -> Tuple[Sequence[str], bytes]: +def arrow_buffer(table, compression: Optional[str] = None) -> Tuple[Sequence[str], bytes]: pyarrow = check_arrow() + options = None + if compression in ('zstd', 'lz4'): + options = pyarrow.ipc.IpcWriteOptions(compression=pyarrow.Codec(compression=compression)) sink = pyarrow.BufferOutputStream() - with pyarrow.RecordBatchFileWriter(sink, table.schema) as writer: + with pyarrow.RecordBatchFileWriter(sink, table.schema, options=options) as writer: writer.write(table) return table.schema.names, sink.getvalue() diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make index e594301105..89d942df53 100644 --- a/contrib/python/clickhouse-connect/ya.make +++ b/contrib/python/clickhouse-connect/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.8.1) +VERSION(0.8.2) LICENSE(Apache-2.0) |