aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgenii Beloshabskii <beloshabskiy@gmail.com>2022-04-12 14:41:10 +0300
committerEvgenii Beloshabskii <beloshabskiy@gmail.com>2022-04-12 14:41:10 +0300
commit28e9004b809525a9280a41cad133f8fec8ae8314 (patch)
tree7ee1b67dec36d25f01fb07b32849f5cd5b210ce8
parent70ba69a2375d52936f770c56cc0ca25564316282 (diff)
downloadydb-28e9004b809525a9280a41cad133f8fec8ae8314.tar.gz
YQ-1027: Support Parquet
ref:d888b6b31b88763402f0b7de0c2acfc41cf592bb
-rwxr-xr-x.github/check_dirs.sh33
-rw-r--r--.github/workflows/allowed_dirs.yml17
-rw-r--r--ydb/library/yql/providers/common/provider/yql_provider.cpp5
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt2
-rw-r--r--ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp24
5 files changed, 18 insertions, 63 deletions
diff --git a/.github/check_dirs.sh b/.github/check_dirs.sh
deleted file mode 100755
index ba1e685a37..0000000000
--- a/.github/check_dirs.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-GIT_URL=$1
-
-set -e
-declare -A top_dirs=(
- [ydb/]=1,
- [util/]=1,
- [build/]=1,
- [contrib/]=1,
- [certs/]=1,
- [cmake/]=1,
- [.git/]=1,
- [.github/]=1,
- [library/]=1,
- [tools/]=1,
-)
-
-cd $GIT_URL
-
-shopt -s dotglob
-shopt -s nullglob
-array=(*/)
-
-for dir in "${array[@]}"
-do
- if [[ ! ${top_dirs[$dir]} ]]
- then
- echo "$dir is not allowed root level directory."
- exit 1
- fi
-done
-
diff --git a/.github/workflows/allowed_dirs.yml b/.github/workflows/allowed_dirs.yml
deleted file mode 100644
index c2b997abec..0000000000
--- a/.github/workflows/allowed_dirs.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: CheckAllowedDirs
-
-on:
- push:
- branches: [ main ]
- pull_request:
- branches: [ main ]
-
-jobs:
- build:
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v3
-
- - name: Check dirs
- run: ${{github.workspace}}/.github/check_dirs.sh ${{github.workspace}}
diff --git a/ydb/library/yql/providers/common/provider/yql_provider.cpp b/ydb/library/yql/providers/common/provider/yql_provider.cpp
index cca0d60ed3..426ff16596 100644
--- a/ydb/library/yql/providers/common/provider/yql_provider.cpp
+++ b/ydb/library/yql/providers/common/provider/yql_provider.cpp
@@ -22,13 +22,14 @@ namespace NCommon {
using namespace NNodes;
namespace {
- std::array<std::string_view, 6> Formats = {
+ std::array<std::string_view, 7> Formats = {
"csv_with_names"sv,
"tsv_with_names"sv,
"json_list"sv,
"json"sv,
"raw"sv,
- "json_each_row"sv
+ "json_each_row"sv,
+ "parquet"sv
};
std::array<std::string_view, 6> Compressions = {
"gzip"sv,
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt b/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
index 1c3c970f01..fa1e5eecac 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
+++ b/ydb/library/yql/udfs/common/clickhouse/client/CMakeLists.linux.txt
@@ -40,7 +40,7 @@ target_compile_options(clickhouse_client_udf.global PRIVATE
-DARCADIA_BUILD
-DOS_LINUX
-DUSE_ARROW=0
- -DUSE_PARQUET=0
+ -DUSE_PARQUET=1
-DUSE_ORC=0
-DUSE_AVRO=0
-DUSE_UNWIND=0
diff --git a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index a0b92f98ca..8d56153551 100644
--- a/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/ydb/library/yql/udfs/common/clickhouse/client/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -124,16 +124,20 @@ void ParquetBlockInputFormat::prepareReader()
void registerInputFormatProcessorParquet(FormatFactory &factory)
{
- factory.registerInputFormatProcessor(
- "Parquet",
- [](ReadBuffer &buf,
- const Block &sample,
- const RowInputFormatParams &,
- const FormatSettings & settings)
- {
- return std::make_shared<ParquetBlockInputFormat>(buf, sample, settings);
- });
- factory.markFormatAsColumnOriented("Parquet");
+ for (const auto& name : {"Parquet", "parquet"})
+ {
+ factory.registerInputFormatProcessor(
+ name,
+ [](ReadBuffer &buf,
+ const Block &sample,
+ const RowInputFormatParams &,
+ const FormatSettings & settings)
+ {
+ return std::make_shared<ParquetBlockInputFormat>(buf, sample, settings);
+ });
+ factory.markFormatAsColumnOriented(name);
+ }
+
}
}