aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-10-19 11:18:48 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-10-19 11:28:22 +0300
commit6258e6efc801f6a94c329d149763c5650c507ec1 (patch)
tree4f94c6d4667bc32fbcf5cb9c5944c9a57da32c53
parent1e95bf12278f195bc3450875f66f7304b8b269b9 (diff)
downloadydb-6258e6efc801f6a94c329d149763c5650c507ec1.tar.gz
Intermediate changes
commit_hash:1338f91346a59e6bbff8c8f2444e990a51dc2e63
-rw-r--r--contrib/libs/croaring/.yandex_meta/override.nix4
-rw-r--r--contrib/libs/croaring/README.md80
-rw-r--r--contrib/libs/croaring/include/roaring/roaring_version.h4
-rw-r--r--contrib/libs/croaring/src/containers/bitset.c2
-rw-r--r--contrib/libs/croaring/ya.make4
-rw-r--r--contrib/python/clickhouse-connect/.dist-info/METADATA2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/__version__.py2
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py19
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py3
-rw-r--r--contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py7
-rw-r--r--contrib/python/clickhouse-connect/ya.make2
11 files changed, 71 insertions, 58 deletions
diff --git a/contrib/libs/croaring/.yandex_meta/override.nix b/contrib/libs/croaring/.yandex_meta/override.nix
index fa1cd2b260..7c23a64e1a 100644
--- a/contrib/libs/croaring/.yandex_meta/override.nix
+++ b/contrib/libs/croaring/.yandex_meta/override.nix
@@ -1,12 +1,12 @@
pkgs: attrs: with pkgs; with attrs; rec {
pname = "croaring";
- version = "4.2.0";
+ version = "4.2.1";
src = fetchFromGitHub {
owner = "RoaringBitmap";
repo = "CRoaring";
rev = "v${version}";
- hash = "sha256-PzwtQDAsnRGIjeb3Ax6qqXtdEqtwaCWsj6g46J3Oqm0=";
+ hash = "sha256-qOFkDu0JM+wBIlGGyewojicCp2pmtr643J3dW6el+O4=";
};
patches = [];
diff --git a/contrib/libs/croaring/README.md b/contrib/libs/croaring/README.md
index eb5ee92752..0f938004c9 100644
--- a/contrib/libs/croaring/README.md
+++ b/contrib/libs/croaring/README.md
@@ -530,26 +530,26 @@ bitset_free(b); // frees memory
More advanced example:
```C
- bitset_t *b = bitset_create();
- for (int k = 0; k < 1000; ++k) {
- bitset_set(b, 3 * k);
- }
- // We have bitset_count(b) == 1000.
- // We have bitset_get(b, 3) is true
- // You can iterate through the values:
- size_t k = 0;
- for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
- // You will have i == k
- k += 3;
- }
- // We support a wide range of operations on two bitsets such as
- // bitset_inplace_symmetric_difference(b1,b2);
- // bitset_inplace_symmetric_difference(b1,b2);
- // bitset_inplace_difference(b1,b2);// should make no difference
- // bitset_inplace_union(b1,b2);
- // bitset_inplace_intersection(b1,b2);
- // bitsets_disjoint
- // bitsets_intersect
+bitset_t *b = bitset_create();
+for (int k = 0; k < 1000; ++k) {
+ bitset_set(b, 3 * k);
+}
+// We have bitset_count(b) == 1000.
+// We have bitset_get(b, 3) is true
+// You can iterate through the values:
+size_t k = 0;
+for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
+ // You will have i == k
+ k += 3;
+}
+// We support a wide range of operations on two bitsets such as
+// bitset_inplace_symmetric_difference(b1,b2);
+// bitset_inplace_symmetric_difference(b1,b2);
+// bitset_inplace_difference(b1,b2);// should make no difference
+// bitset_inplace_union(b1,b2);
+// bitset_inplace_intersection(b1,b2);
+// bitsets_disjoint
+// bitsets_intersect
```
In some instances, you may want to convert a Roaring bitmap into a conventional (uncompressed) bitset.
@@ -557,28 +557,28 @@ Indeed, bitsets have advantages such as higher query performances in some cases.
illustrates how you may do so:
```C
- roaring_bitmap_t *r1 = roaring_bitmap_create();
- for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+roaring_bitmap_t *r1 = roaring_bitmap_create();
+for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
roaring_bitmap_add(r1, i);
- }
- for (uint32_t i = 100000; i < 500000; i+= 100) {
+}
+for (uint32_t i = 100000; i < 500000; i+= 100) {
roaring_bitmap_add(r1, i);
- }
- roaring_bitmap_add_range(r1, 500000, 600000);
- bitset_t * bitset = bitset_create();
- bool success = roaring_bitmap_to_bitset(r1, bitset);
- assert(success); // could fail due to memory allocation.
- assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
- // You can then query the bitset:
- for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
- assert(bitset_get(bitset,i));
- }
- for (uint32_t i = 100000; i < 500000; i+= 100) {
- assert(bitset_get(bitset,i));
- }
- // you must free the memory:
- bitset_free(bitset);
- roaring_bitmap_free(r1);
+}
+roaring_bitmap_add_range(r1, 500000, 600000);
+bitset_t * bitset = bitset_create();
+bool success = roaring_bitmap_to_bitset(r1, bitset);
+assert(success); // could fail due to memory allocation.
+assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
+// You can then query the bitset:
+for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+ assert(bitset_get(bitset,i));
+}
+for (uint32_t i = 100000; i < 500000; i+= 100) {
+ assert(bitset_get(bitset,i));
+}
+// you must free the memory:
+bitset_free(bitset);
+roaring_bitmap_free(r1);
```
You should be aware that a convention bitset (`bitset_t *`) may use much more
diff --git a/contrib/libs/croaring/include/roaring/roaring_version.h b/contrib/libs/croaring/include/roaring/roaring_version.h
index 33926a2102..aad63adecb 100644
--- a/contrib/libs/croaring/include/roaring/roaring_version.h
+++ b/contrib/libs/croaring/include/roaring/roaring_version.h
@@ -2,11 +2,11 @@
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
#ifndef ROARING_INCLUDE_ROARING_VERSION
#define ROARING_INCLUDE_ROARING_VERSION
-#define ROARING_VERSION "4.2.0"
+#define ROARING_VERSION "4.2.1"
enum {
ROARING_VERSION_MAJOR = 4,
ROARING_VERSION_MINOR = 2,
- ROARING_VERSION_REVISION = 0
+ ROARING_VERSION_REVISION = 1
};
#endif // ROARING_INCLUDE_ROARING_VERSION
// clang-format on \ No newline at end of file
diff --git a/contrib/libs/croaring/src/containers/bitset.c b/contrib/libs/croaring/src/containers/bitset.c
index 7b84af82ec..7a38d072b3 100644
--- a/contrib/libs/croaring/src/containers/bitset.c
+++ b/contrib/libs/croaring/src/containers/bitset.c
@@ -904,7 +904,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
} \
int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
const bitset_container_t *src_2) { \
- printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \
+ const uint64_t * __restrict__ words_1 = src_1->words; \
const uint64_t * __restrict__ words_2 = src_2->words; \
int32_t sum = 0; \
for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
diff --git a/contrib/libs/croaring/ya.make b/contrib/libs/croaring/ya.make
index 78b8b40c9d..63e87a8b2a 100644
--- a/contrib/libs/croaring/ya.make
+++ b/contrib/libs/croaring/ya.make
@@ -10,9 +10,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(4.2.0)
+VERSION(4.2.1)
-ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.2.0.tar.gz)
+ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.2.1.tar.gz)
ADDINCL(
GLOBAL contrib/libs/croaring/include
diff --git a/contrib/python/clickhouse-connect/.dist-info/METADATA b/contrib/python/clickhouse-connect/.dist-info/METADATA
index 24f7a78836..bb928a4bc2 100644
--- a/contrib/python/clickhouse-connect/.dist-info/METADATA
+++ b/contrib/python/clickhouse-connect/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: clickhouse-connect
-Version: 0.8.1
+Version: 0.8.2
Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
Home-page: https://github.com/ClickHouse/clickhouse-connect
Author: ClickHouse Inc.
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
index 398cfc4c74..c400d68132 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/__version__.py
@@ -1 +1 @@
-version = '0.8.1'
+version = '0.8.2'
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
index fe11c27883..d6b84885e4 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py
@@ -59,9 +59,15 @@ class Client(ABC):
"""
self.query_limit = coerce_int(query_limit)
self.query_retries = coerce_int(query_retries)
+ if database and not database == '__default__':
+ self.database = database
if show_clickhouse_errors is not None:
self.show_clickhouse_errors = coerce_bool(show_clickhouse_errors)
self.server_host_name = server_host_name
+ self.uri = uri
+ self._init_common_settings(apply_server_timezone)
+
+ def _init_common_settings(self, apply_server_timezone:Optional[Union[str, bool]] ):
self.server_tz, dst_safe = pytz.UTC, True
self.server_version, server_tz = \
tuple(self.command('SELECT version(), timezone()', use_database=False))
@@ -83,8 +89,7 @@ class Client(ABC):
readonly = common.get_setting('readonly')
server_settings = self.query(f'SELECT name, value, {readonly} as readonly FROM system.settings LIMIT 10000')
self.server_settings = {row['name']: SettingDef(**row) for row in server_settings.named_results()}
- if database and not database == '__default__':
- self.database = database
+
if self.min_version(CH_VERSION_WITH_PROTOCOL):
# Unfortunately we have to validate that the client protocol version is actually used by ClickHouse
# since the query parameter could be stripped off (in particular, by CHProxy)
@@ -95,7 +100,9 @@ class Client(ABC):
self.protocol_version = PROTOCOL_VERSION_WITH_LOW_CARD
if self._setting_status('date_time_input_format').is_writable:
self.set_client_setting('date_time_input_format', 'best_effort')
- self.uri = uri
+ if self._setting_status('allow_experimental_json_type').is_set:
+ self.set_client_setting('cast_string_to_dynamic_use_inference', '1')
+
def _validate_settings(self, settings: Optional[Dict[str, Any]]) -> Dict[str, str]:
"""
@@ -655,7 +662,8 @@ class Client(ABC):
settings=settings, context=context)
def insert_arrow(self, table: str,
- arrow_table, database: str = None,
+ arrow_table,
+ database: str = None,
settings: Optional[Dict] = None) -> QuerySummary:
"""
Insert a PyArrow table DataFrame into ClickHouse using raw Arrow format
@@ -666,7 +674,8 @@ class Client(ABC):
:return: QuerySummary with summary information, throws exception if insert fails
"""
full_table = table if '.' in table or not database else f'{database}.{table}'
- column_names, insert_block = arrow_buffer(arrow_table)
+ compression = self.write_compression if self.write_compression in ('zstd', 'lz4') else None
+ column_names, insert_block = arrow_buffer(arrow_table, compression)
return self.raw_insert(full_table, column_names, insert_block, settings, 'Arrow')
def create_insert_context(self,
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
index 58b5460a59..558d66f614 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/httputil.py
@@ -244,7 +244,8 @@ class ResponseSource:
else:
chunk = chunks.popleft()
current_size -= len(chunk)
- yield chunk
+ if chunk:
+ yield chunk
self.gen = buffered()
diff --git a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
index 54edbeff09..bd10270e71 100644
--- a/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
+++ b/contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py
@@ -374,9 +374,12 @@ def to_arrow_batches(buffer: IOBase) -> StreamContext:
return StreamContext(buffer, reader)
-def arrow_buffer(table) -> Tuple[Sequence[str], bytes]:
+def arrow_buffer(table, compression: Optional[str] = None) -> Tuple[Sequence[str], bytes]:
pyarrow = check_arrow()
+ options = None
+ if compression in ('zstd', 'lz4'):
+ options = pyarrow.ipc.IpcWriteOptions(compression=pyarrow.Codec(compression=compression))
sink = pyarrow.BufferOutputStream()
- with pyarrow.RecordBatchFileWriter(sink, table.schema) as writer:
+ with pyarrow.RecordBatchFileWriter(sink, table.schema, options=options) as writer:
writer.write(table)
return table.schema.names, sink.getvalue()
diff --git a/contrib/python/clickhouse-connect/ya.make b/contrib/python/clickhouse-connect/ya.make
index e594301105..89d942df53 100644
--- a/contrib/python/clickhouse-connect/ya.make
+++ b/contrib/python/clickhouse-connect/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.8.1)
+VERSION(0.8.2)
LICENSE(Apache-2.0)