1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
|
import asyncio
import io
import os
from concurrent.futures.thread import ThreadPoolExecutor
from datetime import tzinfo
from typing import Optional, Union, Dict, Any, Sequence, Iterable, Generator, BinaryIO
from clickhouse_connect.driver.client import Client
from clickhouse_connect.driver.common import StreamContext
from clickhouse_connect.driver.httpclient import HttpClient
from clickhouse_connect.driver.external import ExternalData
from clickhouse_connect.driver.query import QueryContext, QueryResult
from clickhouse_connect.driver.summary import QuerySummary
from clickhouse_connect.datatypes.base import ClickHouseType
from clickhouse_connect.driver.insert import InsertContext
# pylint: disable=too-many-public-methods,too-many-instance-attributes,too-many-arguments,too-many-positional-arguments,too-many-locals
class AsyncClient:
"""
AsyncClient is a wrapper around the ClickHouse Client object that allows for async calls to the ClickHouse server.
Internally, each of the methods that uses IO is wrapped in a call to EventLoop.run_in_executor.
"""
def __init__(self, *, client: Client, executor_threads: int = 0):
if isinstance(client, HttpClient):
client.headers['User-Agent'] = client.headers['User-Agent'].replace('mode:sync;', 'mode:async;')
self.client = client
if executor_threads == 0:
executor_threads = min(32, (os.cpu_count() or 1) + 4) # Mimic the default behavior
self.executor = ThreadPoolExecutor(max_workers=executor_threads)
def set_client_setting(self, key, value):
"""
Set a clickhouse setting for the client after initialization. If a setting is not recognized by ClickHouse,
or the setting is identified as "read_only", this call will either throw a Programming exception or attempt
to send the setting anyway based on the common setting 'invalid_setting_action'.
:param key: ClickHouse setting name
:param value: ClickHouse setting value
"""
self.client.set_client_setting(key=key, value=value)
def get_client_setting(self, key) -> Optional[str]:
"""
:param key: The setting key
:return: The string value of the setting, if it exists, or None
"""
return self.client.get_client_setting(key=key)
def min_version(self, version_str: str) -> bool:
"""
Determine whether the connected server is at least the submitted version
For Altinity Stable versions like 22.8.15.25.altinitystable
the last condition in the first list comprehension expression is added
:param version_str: A version string consisting of up to 4 integers delimited by dots
:return: True if version_str is greater than the server_version, False if less than
"""
return self.client.min_version(version_str)
def close(self):
"""
Subclass implementation to close the connection to the server/deallocate the client
"""
self.client.close()
async def query(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
column_oriented: Optional[bool] = None,
use_numpy: Optional[bool] = None,
max_str_len: Optional[int] = None,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
external_data: Optional[ExternalData] = None) -> QueryResult:
"""
Main query method for SELECT, DESCRIBE and other SQL statements that return a result matrix.
For parameters, see the create_query_context method.
:return: QueryResult -- data and metadata from response
"""
def _query():
return self.client.query(query=query, parameters=parameters, settings=settings, query_formats=query_formats,
column_formats=column_formats, encoding=encoding, use_none=use_none,
column_oriented=column_oriented, use_numpy=use_numpy, max_str_len=max_str_len,
context=context, query_tz=query_tz, column_tzs=column_tzs,
external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query)
return result
async def query_column_block_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
external_data: Optional[ExternalData] = None) -> StreamContext:
"""
Variation of main query method that returns a stream of column oriented blocks.
For parameters, see the create_query_context method.
:return: StreamContext -- Iterable stream context that returns column oriented blocks
"""
def _query_column_block_stream():
return self.client.query_column_block_stream(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_column_block_stream)
return result
async def query_row_block_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
external_data: Optional[ExternalData] = None) -> StreamContext:
"""
Variation of main query method that returns a stream of row oriented blocks.
For parameters, see the create_query_context method.
:return: StreamContext -- Iterable stream context that returns blocks of rows
"""
def _query_row_block_stream():
return self.client.query_row_block_stream(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_row_block_stream)
return result
async def query_rows_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
context: QueryContext = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
external_data: Optional[ExternalData] = None) -> StreamContext:
"""
Variation of main query method that returns a stream of row oriented blocks.
For parameters, see the create_query_context method.
:return: StreamContext -- Iterable stream context that returns blocks of rows
"""
def _query_rows_stream():
return self.client.query_rows_stream(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, context=context,
query_tz=query_tz, column_tzs=column_tzs,
external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_rows_stream)
return result
async def raw_query(self,
query: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
fmt: str = None,
use_database: bool = True,
external_data: Optional[ExternalData] = None) -> bytes:
"""
Query method that simply returns the raw ClickHouse format bytes.
:param query: Query statement/format string
:param parameters: Optional dictionary used to format the query
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param fmt: ClickHouse output format
:param use_database Send the database parameter to ClickHouse so the command will be executed in the client
database context
:param external_data External data to send with the query
:return: bytes representing raw ClickHouse return value based on format
"""
def _raw_query():
return self.client.raw_query(query=query, parameters=parameters, settings=settings, fmt=fmt,
use_database=use_database, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _raw_query)
return result
async def raw_stream(self, query: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
fmt: str = None,
use_database: bool = True,
external_data: Optional[ExternalData] = None) -> io.IOBase:
"""
Query method that returns the result as an io.IOBase iterator.
:param query: Query statement/format string
:param parameters: Optional dictionary used to format the query
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param fmt: ClickHouse output format
:param use_database Send the database parameter to ClickHouse so the command will be executed in the client
database context
:param external_data External data to send with the query
:return: io.IOBase stream/iterator for the result
"""
def _raw_stream():
return self.client.raw_stream(query=query, parameters=parameters, settings=settings, fmt=fmt,
use_database=use_database, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _raw_stream)
return result
async def query_np(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, str]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
max_str_len: Optional[int] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None):
"""
Query method that returns the results as a numpy array.
For parameter values, see the create_query_context method.
:return: Numpy array representing the result set
"""
def _query_np():
return self.client.query_np(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats, encoding=encoding,
use_none=use_none, max_str_len=max_str_len, context=context,
external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_np)
return result
async def query_np_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, str]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
max_str_len: Optional[int] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None) -> StreamContext:
"""
Query method that returns the results as a stream of numpy arrays.
For parameter values, see the create_query_context method.
:return: Generator that yield a numpy array per block representing the result set
"""
def _query_np_stream():
return self.client.query_np_stream(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none, max_str_len=max_str_len,
context=context, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_np_stream)
return result
async def query_df(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, str]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
max_str_len: Optional[int] = None,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None):
"""
Query method that results the results as a pandas dataframe.
For parameter values, see the create_query_context method.
:return: Pandas dataframe representing the result set
"""
def _query_df():
return self.client.query_df(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats, encoding=encoding,
use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
query_tz=query_tz, column_tzs=column_tzs, context=context,
external_data=external_data, use_extended_dtypes=use_extended_dtypes)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_df)
return result
async def query_df_stream(self,
query: Optional[str] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, str]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
max_str_len: Optional[int] = None,
use_na_values: Optional[bool] = None,
query_tz: Optional[str] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
context: QueryContext = None,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None) -> StreamContext:
"""
Query method that returns the results as a StreamContext.
For parameter values, see the create_query_context method.
:return: Generator that yields a Pandas dataframe per block representing the result set
"""
def _query_df_stream():
return self.client.query_df_stream(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding,
use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
query_tz=query_tz, column_tzs=column_tzs, context=context,
external_data=external_data, use_extended_dtypes=use_extended_dtypes)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_df_stream)
return result
def create_query_context(self,
query: Optional[Union[str, bytes]] = None,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
query_formats: Optional[Dict[str, str]] = None,
column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
encoding: Optional[str] = None,
use_none: Optional[bool] = None,
column_oriented: Optional[bool] = None,
use_numpy: Optional[bool] = False,
max_str_len: Optional[int] = 0,
context: Optional[QueryContext] = None,
query_tz: Optional[Union[str, tzinfo]] = None,
column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
use_na_values: Optional[bool] = None,
streaming: bool = False,
as_pandas: bool = False,
external_data: Optional[ExternalData] = None,
use_extended_dtypes: Optional[bool] = None) -> QueryContext:
"""
Creates or updates a reusable QueryContext object
:param query: Query statement/format string
:param parameters: Optional dictionary used to format the query
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param query_formats: See QueryContext __init__ docstring
:param column_formats: See QueryContext __init__ docstring
:param encoding: See QueryContext __init__ docstring
:param use_none: Use None for ClickHouse NULL instead of default values. Note that using None in Numpy
arrays will force the numpy array dtype to 'object', which is often inefficient. This effect also
will impact the performance of Pandas dataframes.
:param column_oriented: Deprecated. Controls orientation of the QueryResult result_set property
:param use_numpy: Return QueryResult columns as one-dimensional numpy arrays
:param max_str_len: Limit returned ClickHouse String values to this length, which allows a Numpy
structured array even with ClickHouse variable length String columns. If 0, Numpy arrays for
String columns will always be object arrays
:param context: An existing QueryContext to be updated with any provided parameter values
:param query_tz Either a string or a pytz tzinfo object. (Strings will be converted to tzinfo objects).
Values for any DateTime or DateTime64 column in the query will be converted to Python datetime.datetime
objects with the selected timezone
:param column_tzs A dictionary of column names to tzinfo objects (or strings that will be converted to
tzinfo objects). The timezone will be applied to datetime objects returned in the query
:param use_na_values: Deprecated alias for use_advanced_dtypes
:param as_pandas Return the result columns as pandas.Series objects
:param streaming Marker used to correctly configure streaming queries
:param external_data ClickHouse "external data" to send with query
:param use_extended_dtypes: Only relevant to Pandas Dataframe queries. Use Pandas "missing types", such as
pandas.NA and pandas.NaT for ClickHouse NULL values, as well as extended Pandas dtypes such as IntegerArray
and StringArray. Defaulted to True for query_df methods
:return: Reusable QueryContext
"""
return self.client.create_query_context(query=query, parameters=parameters, settings=settings,
query_formats=query_formats, column_formats=column_formats,
encoding=encoding, use_none=use_none,
column_oriented=column_oriented,
use_numpy=use_numpy, max_str_len=max_str_len, context=context,
query_tz=query_tz, column_tzs=column_tzs,
use_na_values=use_na_values,
streaming=streaming, as_pandas=as_pandas,
external_data=external_data,
use_extended_dtypes=use_extended_dtypes)
async def query_arrow(self,
query: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
use_strings: Optional[bool] = None,
external_data: Optional[ExternalData] = None):
"""
Query method using the ClickHouse Arrow format to return a PyArrow table
:param query: Query statement/format string
:param parameters: Optional dictionary used to format the query
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
:param external_data ClickHouse "external data" to send with query
:return: PyArrow.Table
"""
def _query_arrow():
return self.client.query_arrow(query=query, parameters=parameters, settings=settings,
use_strings=use_strings, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_arrow)
return result
async def query_arrow_stream(self,
query: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
settings: Optional[Dict[str, Any]] = None,
use_strings: Optional[bool] = None,
external_data: Optional[ExternalData] = None) -> StreamContext:
"""
Query method that returns the results as a stream of Arrow tables
:param query: Query statement/format string
:param parameters: Optional dictionary used to format the query
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
:param external_data ClickHouse "external data" to send with query
:return: Generator that yields a PyArrow.Table for per block representing the result set
"""
def _query_arrow_stream():
return self.client.query_arrow_stream(query=query, parameters=parameters, settings=settings,
use_strings=use_strings, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _query_arrow_stream)
return result
async def command(self,
cmd: str,
parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
data: Union[str, bytes] = None,
settings: Dict[str, Any] = None,
use_database: bool = True,
external_data: Optional[ExternalData] = None) -> Union[str, int, Sequence[str], QuerySummary]:
"""
Client method that returns a single value instead of a result set
:param cmd: ClickHouse query/command as a python format string
:param parameters: Optional dictionary of key/values pairs to be formatted
:param data: Optional 'data' for the command (for INSERT INTO in particular)
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param use_database: Send the database parameter to ClickHouse so the command will be executed in the client
database context. Otherwise, no database will be specified with the command. This is useful for determining
the default user database
:param external_data ClickHouse "external data" to send with command/query
:return: Decoded response from ClickHouse as either a string, int, or sequence of strings, or QuerySummary
if no data returned
"""
def _command():
return self.client.command(cmd=cmd, parameters=parameters, data=data, settings=settings,
use_database=use_database, external_data=external_data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _command)
return result
async def ping(self) -> bool:
"""
Validate the connection, does not throw an Exception (see debug logs)
:return: ClickHouse server is up and reachable
"""
def _ping():
return self.client.ping()
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _ping)
return result
async def insert(self,
table: Optional[str] = None,
data: Sequence[Sequence[Any]] = None,
column_names: Union[str, Iterable[str]] = '*',
database: Optional[str] = None,
column_types: Sequence[ClickHouseType] = None,
column_type_names: Sequence[str] = None,
column_oriented: bool = False,
settings: Optional[Dict[str, Any]] = None,
context: InsertContext = None) -> QuerySummary:
"""
Method to insert multiple rows/data matrix of native Python objects. If context is specified arguments
other than data are ignored
:param table: Target table
:param data: Sequence of sequences of Python data
:param column_names: Ordered list of column names or '*' if column types should be retrieved from the
ClickHouse table definition
:param database: Target database -- will use client default database if not specified.
:param column_types: ClickHouse column types. If set then column data does not need to be retrieved from
the server
:param column_type_names: ClickHouse column type names. If set then column data does not need to be
retrieved from the server
:param column_oriented: If true the data is already "pivoted" in column form
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param context: Optional reusable insert context to allow repeated inserts into the same table with
different data batches
:return: QuerySummary with summary information, throws exception if insert fails
"""
def _insert():
return self.client.insert(table=table, data=data, column_names=column_names, database=database,
column_types=column_types, column_type_names=column_type_names,
column_oriented=column_oriented, settings=settings, context=context)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _insert)
return result
async def insert_df(self, table: str = None,
df=None,
database: Optional[str] = None,
settings: Optional[Dict] = None,
column_names: Optional[Sequence[str]] = None,
column_types: Sequence[ClickHouseType] = None,
column_type_names: Sequence[str] = None,
context: InsertContext = None) -> QuerySummary:
"""
Insert a pandas DataFrame into ClickHouse. If context is specified arguments other than df are ignored
:param table: ClickHouse table
:param df: two-dimensional pandas dataframe
:param database: Optional ClickHouse database
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param column_names: An optional list of ClickHouse column names. If not set, the DataFrame column names
will be used
:param column_types: ClickHouse column types. If set then column data does not need to be retrieved from
the server
:param column_type_names: ClickHouse column type names. If set then column data does not need to be
retrieved from the server
:param context: Optional reusable insert context to allow repeated inserts into the same table with
different data batches
:return: QuerySummary with summary information, throws exception if insert fails
"""
def _insert_df():
return self.client.insert_df(table=table, df=df, database=database, settings=settings,
column_names=column_names,
column_types=column_types, column_type_names=column_type_names,
context=context)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _insert_df)
return result
async def insert_arrow(self, table: str,
arrow_table, database: str = None,
settings: Optional[Dict] = None) -> QuerySummary:
"""
Insert a PyArrow table DataFrame into ClickHouse using raw Arrow format
:param table: ClickHouse table
:param arrow_table: PyArrow Table object
:param database: Optional ClickHouse database
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:return: QuerySummary with summary information, throws exception if insert fails
"""
def _insert_arrow():
return self.client.insert_arrow(table=table, arrow_table=arrow_table, database=database, settings=settings)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _insert_arrow)
return result
async def create_insert_context(self,
table: str,
column_names: Optional[Union[str, Sequence[str]]] = None,
database: Optional[str] = None,
column_types: Sequence[ClickHouseType] = None,
column_type_names: Sequence[str] = None,
column_oriented: bool = False,
settings: Optional[Dict[str, Any]] = None,
data: Optional[Sequence[Sequence[Any]]] = None) -> InsertContext:
"""
Builds a reusable insert context to hold state for a duration of an insert
:param table: Target table
:param database: Target database. If not set, uses the client default database
:param column_names: Optional ordered list of column names. If not set, all columns ('*') will be assumed
in the order specified by the table definition
:param database: Target database -- will use client default database if not specified
:param column_types: ClickHouse column types. Optional Sequence of ClickHouseType objects. If neither column
types nor column type names are set, actual column types will be retrieved from the server.
:param column_type_names: ClickHouse column type names. Specified column types by name string
:param column_oriented: If true the data is already "pivoted" in column form
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param data: Initial dataset for insert
:return Reusable insert context
"""
def _create_insert_context():
return self.client.create_insert_context(table=table, column_names=column_names, database=database,
column_types=column_types, column_type_names=column_type_names,
column_oriented=column_oriented, settings=settings, data=data)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _create_insert_context)
return result
async def data_insert(self, context: InsertContext) -> QuerySummary:
"""
Subclass implementation of the data insert
:context: InsertContext parameter object
:return: No return, throws an exception if the insert fails
"""
def _data_insert():
return self.client.data_insert(context=context)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _data_insert)
return result
async def raw_insert(self, table: str,
column_names: Optional[Sequence[str]] = None,
insert_block: Union[str, bytes, Generator[bytes, None, None], BinaryIO] = None,
settings: Optional[Dict] = None,
fmt: Optional[str] = None,
compression: Optional[str] = None) -> QuerySummary:
"""
Insert data already formatted in a bytes object
:param table: Table name (whether qualified with the database name or not)
:param column_names: Sequence of column names
:param insert_block: Binary or string data already in a recognized ClickHouse format
:param settings: Optional dictionary of ClickHouse settings (key/string values)
:param compression: Recognized ClickHouse `Accept-Encoding` header compression value
:param fmt: Valid clickhouse format
"""
def _raw_insert():
return self.client.raw_insert(table=table, column_names=column_names, insert_block=insert_block,
settings=settings, fmt=fmt, compression=compression)
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, _raw_insert)
return result
|