aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorОлег <150132506+iddqdex@users.noreply.github.com>2025-04-18 15:41:40 +0300
committerGitHub <noreply@github.com>2025-04-18 12:41:40 +0000
commita0b13519c60db31ac0ec1bb377a07172f8a8b95a (patch)
treea0975dc6814e2f3bbfc45a7ac7792201e339523a
parent68c43aed9070c11917f3b1bb79f11197910d4a6a (diff)
downloadydb-a0b13519c60db31ac0ec1bb377a07172f8a8b95a.tar.gz
change partitioning in tpch (#17416)
Co-authored-by: Nikolay Perfilov <pnv1@yandex-team.ru>
-rw-r--r--ydb/apps/ydb/CHANGELOG.md1
-rw-r--r--ydb/library/workload/benchmark_base/workload.cpp28
-rw-r--r--ydb/library/workload/benchmark_base/workload.h1
-rw-r--r--ydb/library/workload/tpc_base/tpc_base.cpp4
-rw-r--r--ydb/library/workload/tpcds/data_generator.cpp2
-rw-r--r--ydb/library/workload/tpch/data_generator.cpp2
-rw-r--r--ydb/library/workload/tpch/tpch.cpp4
-rw-r--r--ydb/library/workload/tpch/tpch.h1
-rw-r--r--ydb/library/workload/tpch/tpch_schema.yaml2
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/result.json6
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column645
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column137
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column2
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal2
-rw-r--r--ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb2
-rw-r--r--ydb/tests/functional/benchmarks_init/test_init.py3
16 files changed, 828 insertions, 14 deletions
diff --git a/ydb/apps/ydb/CHANGELOG.md b/ydb/apps/ydb/CHANGELOG.md
index 637e78e4e2b..8fe9b9c2cdd 100644
--- a/ydb/apps/ydb/CHANGELOG.md
+++ b/ydb/apps/ydb/CHANGELOG.md
@@ -1,3 +1,4 @@
+* Added `--scale` option to `ydb workload tpch init` and `ydb workload tpcds init` commands. Sets the percentage of the benchmark's data size and workload to use, relative to full scale.
* Added "--no-discovery" option. It allows to skip discovery and use user provided endpoint to connect to YDB cluster.
* Added `--retries` to `ydb workload <clickbenh|tpch|tpcds> run` command.
* Added `--partition-size` param to `ydb workload <clickbench/tpcds/tpch> init`.
diff --git a/ydb/library/workload/benchmark_base/workload.cpp b/ydb/library/workload/benchmark_base/workload.cpp
index 7e5d09aaffe..cb413f4cca0 100644
--- a/ydb/library/workload/benchmark_base/workload.cpp
+++ b/ydb/library/workload/benchmark_base/workload.cpp
@@ -35,6 +35,22 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
return settings.SerializeAsString();
} ();
+namespace {
+
+ TString KeysList(const NJson::TJsonValue& table, const TString& key) {
+ TVector<TStringBuf> keysV;
+ for (const auto& k: table[key].GetArray()) {
+ keysV.emplace_back(k.GetString());
+ }
+ return JoinSeq(", ", keysV);
+ }
+
+}
+
+ui32 TWorkloadGeneratorBase::GetDefaultPartitionsCount(const TString& /*tableName*/) const {
+ return 64;
+}
+
void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const {
auto specialTypes = GetSpecialDataTypes();
specialTypes["string_type"] = Params.GetStringType();
@@ -65,11 +81,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
}
}
result << JoinSeq(",\n", columns);
- TVector<TStringBuf> keysV;
- for (const auto& k: table["primary_key"].GetArray()) {
- keysV.emplace_back(k.GetString());
- }
- const TString keys = JoinSeq(", ", keysV);
+ const auto keys = KeysList(table, "primary_key");
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << Endl;
} else {
@@ -78,7 +90,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
result << ")" << Endl;
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
- result << "PARTITION BY HASH (" << keys << ")" << Endl;
+ result << "PARTITION BY HASH (" << (table.Has("partition_by") ? KeysList(table, "partition_by") : keys) << ")" << Endl;
}
result << "WITH (" << Endl;
@@ -89,12 +101,12 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
break;
case TWorkloadBaseParams::EStoreType::Column:
result << " STORE = COLUMN," << Endl;
- result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
+ result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl;
break;
case TWorkloadBaseParams::EStoreType::Row:
result << " STORE = ROW," << Endl;
result << " AUTO_PARTITIONING_PARTITION_SIZE_MB = " << Params.GetPartitionSizeMb() << ", " << Endl;
- result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
+ result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl;
}
result << ");" << Endl;
}
diff --git a/ydb/library/workload/benchmark_base/workload.h b/ydb/library/workload/benchmark_base/workload.h
index 52cb0c2774c..6ddf1cbd917 100644
--- a/ydb/library/workload/benchmark_base/workload.h
+++ b/ydb/library/workload/benchmark_base/workload.h
@@ -53,6 +53,7 @@ protected:
virtual TString GetTablesYaml() const = 0;
virtual TSpecialDataTypes GetSpecialDataTypes() const = 0;
NJson::TJsonValue GetTablesJson() const;
+ virtual ui32 GetDefaultPartitionsCount(const TString& tableName) const;
THolder<TGeneratorStateProcessor> StateProcessor;
private:
diff --git a/ydb/library/workload/tpc_base/tpc_base.cpp b/ydb/library/workload/tpc_base/tpc_base.cpp
index 1a91da20851..0e02ee556ee 100644
--- a/ydb/library/workload/tpc_base/tpc_base.cpp
+++ b/ydb/library/workload/tpc_base/tpc_base.cpp
@@ -167,7 +167,7 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm
.StoreResult(&ExternalQueriesDir);
opts.AddLongOption( "syntax", "Query syntax [" + GetEnumAllNames<EQuerySyntax>() + "].")
.StoreResult(&Syntax).DefaultValue(Syntax);
- opts.AddLongOption("scale", "scale in percents")
+ opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
.DefaultValue(Scale).StoreResult(&Scale);
opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.")
.StoreResult(&FloatMode).DefaultValue(FloatMode);
@@ -177,6 +177,8 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm
case TWorkloadParams::ECommandType::Init:
opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.")
.StoreResult(&FloatMode).DefaultValue(FloatMode);
+ opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
+ .DefaultValue(Scale).StoreResult(&Scale);
break;
default:
break;
diff --git a/ydb/library/workload/tpcds/data_generator.cpp b/ydb/library/workload/tpcds/data_generator.cpp
index b15589ca550..495ba99f8d5 100644
--- a/ydb/library/workload/tpcds/data_generator.cpp
+++ b/ydb/library/workload/tpcds/data_generator.cpp
@@ -19,7 +19,7 @@ TTpcdsWorkloadDataInitializerGenerator::TTpcdsWorkloadDataInitializerGenerator(c
void TTpcdsWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) {
TWorkloadDataInitializerBase::ConfigureOpts(opts);
- opts.AddLongOption("scale", "scale in percents")
+ opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
.DefaultValue(Scale).StoreResult(&Scale);
opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n"
"Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys()))
diff --git a/ydb/library/workload/tpch/data_generator.cpp b/ydb/library/workload/tpch/data_generator.cpp
index 1fdbe46142b..24e2f74e8a9 100644
--- a/ydb/library/workload/tpch/data_generator.cpp
+++ b/ydb/library/workload/tpch/data_generator.cpp
@@ -11,7 +11,7 @@ TTpchWorkloadDataInitializerGenerator::TTpchWorkloadDataInitializerGenerator(con
void TTpchWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) {
TWorkloadDataInitializerBase::ConfigureOpts(opts);
- opts.AddLongOption("scale", "scale in percents")
+ opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
.DefaultValue(Scale).StoreResult(&Scale);
opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n"
"Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys()))
diff --git a/ydb/library/workload/tpch/tpch.cpp b/ydb/library/workload/tpch/tpch.cpp
index 183f3a0e7b1..13de9152902 100644
--- a/ydb/library/workload/tpch/tpch.cpp
+++ b/ydb/library/workload/tpch/tpch.cpp
@@ -29,6 +29,10 @@ TWorkloadGeneratorBase::TSpecialDataTypes TTpchWorkloadGenerator::GetSpecialData
}
}
+ui32 TTpchWorkloadGenerator::GetDefaultPartitionsCount(const TString& /*tableName*/) const {
+ return Params.GetScale() <= 10 ? 64 : 256;
+}
+
THolder<IWorkloadQueryGenerator> TTpchWorkloadParams::CreateGenerator() const {
return MakeHolder<TTpchWorkloadGenerator>(*this);
diff --git a/ydb/library/workload/tpch/tpch.h b/ydb/library/workload/tpch/tpch.h
index bfe5506c493..7e801ba73ec 100644
--- a/ydb/library/workload/tpch/tpch.h
+++ b/ydb/library/workload/tpch/tpch.h
@@ -19,6 +19,7 @@ public:
protected:
TString GetTablesYaml() const override;
TWorkloadGeneratorBase::TSpecialDataTypes GetSpecialDataTypes() const override;
+ ui32 GetDefaultPartitionsCount(const TString& tableName) const override;
private:
const TTpchWorkloadParams& Params;
diff --git a/ydb/library/workload/tpch/tpch_schema.yaml b/ydb/library/workload/tpch/tpch_schema.yaml
index 4041d510183..2bcfd8cdcb3 100644
--- a/ydb/library/workload/tpch/tpch_schema.yaml
+++ b/ydb/library/workload/tpch/tpch_schema.yaml
@@ -81,6 +81,8 @@ tables:
primary_key:
- l_orderkey
- l_linenumber
+ partition_by:
+ - l_orderkey
- name: nation
columns:
diff --git a/ydb/tests/functional/benchmarks_init/canondata/result.json b/ydb/tests/functional/benchmarks_init/canondata/result.json
index 4ae82132a23..ff54afb3c35 100644
--- a/ydb/tests/functional/benchmarks_init/canondata/result.json
+++ b/ydb/tests/functional/benchmarks_init/canondata/result.json
@@ -32,6 +32,9 @@
"test_init.TestClickbenchInit.test_s1_s3": {
"uri": "file://test_init.TestClickbenchInit.test_s1_s3/s1_s3"
},
+ "test_init.TestTpcdsInit.test_s100_column": {
+ "uri": "file://test_init.TestTpcdsInit.test_s100_column/s100_column"
+ },
"test_init.TestTpcdsInit.test_s1_column": {
"uri": "file://test_init.TestTpcdsInit.test_s1_column/s1_column"
},
@@ -47,6 +50,9 @@
"test_init.TestTpcdsInit.test_s1_s3": {
"uri": "file://test_init.TestTpcdsInit.test_s1_s3/s1_s3"
},
+ "test_init.TestTpchInit.test_s100_column": {
+ "uri": "file://test_init.TestTpchInit.test_s100_column/s100_column"
+ },
"test_init.TestTpchInit.test_s1_column": {
"uri": "file://test_init.TestTpchInit.test_s1_column/s1_column"
},
diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column
new file mode 100644
index 00000000000..158ec1509f1
--- /dev/null
+++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column
@@ -0,0 +1,645 @@
+Init tables ...
+--!syntax_v1
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer_address` (
+ ca_address_sk Int64 NOT NULL,
+ ca_address_id Utf8,
+ ca_street_number Utf8,
+ ca_street_name Utf8,
+ ca_street_type Utf8,
+ ca_suite_number Utf8,
+ ca_city Utf8,
+ ca_county Utf8,
+ ca_state Utf8,
+ ca_zip Utf8,
+ ca_country Utf8,
+ ca_gmt_offset Double,
+ ca_location_type Utf8,
+ PRIMARY KEY (ca_address_sk)
+)
+PARTITION BY HASH (ca_address_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer_demographics` (
+ cd_demo_sk Int64 NOT NULL,
+ cd_gender Utf8,
+ cd_marital_status Utf8,
+ cd_education_status Utf8,
+ cd_purchase_estimate Int64,
+ cd_credit_rating Utf8,
+ cd_dep_count Int64,
+ cd_dep_employed_count Int64,
+ cd_dep_college_count Int64,
+ PRIMARY KEY (cd_demo_sk)
+)
+PARTITION BY HASH (cd_demo_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/date_dim` (
+ d_date_sk Int64 NOT NULL,
+ d_date_id Utf8,
+ d_date Utf8,
+ d_month_seq Int64,
+ d_week_seq Int64,
+ d_quarter_seq Int64,
+ d_year Int64,
+ d_dow Int64,
+ d_moy Int64,
+ d_dom Int64,
+ d_qoy Int64,
+ d_fy_year Int64,
+ d_fy_quarter_seq Int64,
+ d_fy_week_seq Int64,
+ d_day_name Utf8,
+ d_quarter_name Utf8,
+ d_holiday Utf8,
+ d_weekend Utf8,
+ d_following_holiday Utf8,
+ d_first_dom Int64,
+ d_last_dom Int64,
+ d_same_day_ly Int64,
+ d_same_day_lq Int64,
+ d_current_day Utf8,
+ d_current_week Utf8,
+ d_current_month Utf8,
+ d_current_quarter Utf8,
+ d_current_year Utf8,
+ PRIMARY KEY (d_date_sk)
+)
+PARTITION BY HASH (d_date_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/warehouse` (
+ w_warehouse_sk Int64 NOT NULL,
+ w_warehouse_id Utf8,
+ w_warehouse_name Utf8,
+ w_warehouse_sq_ft Int64,
+ w_street_number Utf8,
+ w_street_name Utf8,
+ w_street_type Utf8,
+ w_suite_number Utf8,
+ w_city Utf8,
+ w_county Utf8,
+ w_state Utf8,
+ w_zip Utf8,
+ w_country Utf8,
+ w_gmt_offset Double,
+ PRIMARY KEY (w_warehouse_sk)
+)
+PARTITION BY HASH (w_warehouse_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/ship_mode` (
+ sm_ship_mode_sk Int64 NOT NULL,
+ sm_ship_mode_id Utf8,
+ sm_type Utf8,
+ sm_code Utf8,
+ sm_carrier Utf8,
+ sm_contract Utf8,
+ PRIMARY KEY (sm_ship_mode_sk)
+)
+PARTITION BY HASH (sm_ship_mode_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/time_dim` (
+ t_time_sk Int64 NOT NULL,
+ t_time_id Utf8,
+ t_time Int64,
+ t_hour Int64,
+ t_minute Int64,
+ t_second Int64,
+ t_am_pm Utf8,
+ t_shift Utf8,
+ t_sub_shift Utf8,
+ t_meal_time Utf8,
+ PRIMARY KEY (t_time_sk)
+)
+PARTITION BY HASH (t_time_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/reason` (
+ r_reason_sk Int64 NOT NULL,
+ r_reason_id Utf8,
+ r_reason_desc Utf8,
+ PRIMARY KEY (r_reason_sk)
+)
+PARTITION BY HASH (r_reason_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/income_band` (
+ ib_income_band_sk Int64 NOT NULL,
+ ib_lower_bound Int64,
+ ib_upper_bound Int64,
+ PRIMARY KEY (ib_income_band_sk)
+)
+PARTITION BY HASH (ib_income_band_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/item` (
+ i_item_sk Int64 NOT NULL,
+ i_item_id Utf8,
+ i_rec_start_date Date32,
+ i_rec_end_date Date32,
+ i_item_desc Utf8,
+ i_current_price Double,
+ i_wholesale_cost Double,
+ i_brand_id Int64,
+ i_brand Utf8,
+ i_class_id Int64,
+ i_class Utf8,
+ i_category_id Int64,
+ i_category Utf8,
+ i_manufact_id Int64,
+ i_manufact Utf8,
+ i_size Utf8,
+ i_formulation Utf8,
+ i_color Utf8,
+ i_units Utf8,
+ i_container Utf8,
+ i_manager_id Int64,
+ i_product_name Utf8,
+ PRIMARY KEY (i_item_sk)
+)
+PARTITION BY HASH (i_item_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/store` (
+ s_store_sk Int64 NOT NULL,
+ s_store_id Utf8,
+ s_rec_start_date Date32,
+ s_rec_end_date Date32,
+ s_closed_date_sk Int64,
+ s_store_name Utf8,
+ s_number_employees Int64,
+ s_floor_space Int64,
+ s_hours Utf8,
+ s_manager Utf8,
+ s_market_id Int64,
+ s_geography_class Utf8,
+ s_market_desc Utf8,
+ s_market_manager Utf8,
+ s_division_id Int64,
+ s_division_name Utf8,
+ s_company_id Int64,
+ s_company_name Utf8,
+ s_street_number Utf8,
+ s_street_name Utf8,
+ s_street_type Utf8,
+ s_suite_number Utf8,
+ s_city Utf8,
+ s_county Utf8,
+ s_state Utf8,
+ s_zip Utf8,
+ s_country Utf8,
+ s_gmt_offset Double,
+ s_tax_precentage Double,
+ PRIMARY KEY (s_store_sk)
+)
+PARTITION BY HASH (s_store_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/call_center` (
+ cc_call_center_sk Int64 NOT NULL,
+ cc_call_center_id Utf8,
+ cc_rec_start_date Date32,
+ cc_rec_end_date Date32,
+ cc_closed_date_sk Int64,
+ cc_open_date_sk Int64,
+ cc_name Utf8,
+ cc_class Utf8,
+ cc_employees Int64,
+ cc_sq_ft Int64,
+ cc_hours Utf8,
+ cc_manager Utf8,
+ cc_mkt_id Int64,
+ cc_mkt_class Utf8,
+ cc_mkt_desc Utf8,
+ cc_market_manager Utf8,
+ cc_division Int64,
+ cc_division_name Utf8,
+ cc_company Int64,
+ cc_company_name Utf8,
+ cc_street_number Utf8,
+ cc_street_name Utf8,
+ cc_street_type Utf8,
+ cc_suite_number Utf8,
+ cc_city Utf8,
+ cc_county Utf8,
+ cc_state Utf8,
+ cc_zip Utf8,
+ cc_country Utf8,
+ cc_gmt_offset Double,
+ cc_tax_percentage Double,
+ PRIMARY KEY (cc_call_center_sk)
+)
+PARTITION BY HASH (cc_call_center_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer` (
+ c_customer_sk Int64 NOT NULL,
+ c_customer_id Utf8,
+ c_current_cdemo_sk Int64,
+ c_current_hdemo_sk Int64,
+ c_current_addr_sk Int64,
+ c_first_shipto_date_sk Int64,
+ c_first_sales_date_sk Int64,
+ c_salutation Utf8,
+ c_first_name Utf8,
+ c_last_name Utf8,
+ c_preferred_cust_flag Utf8,
+ c_birth_day Int64,
+ c_birth_month Int64,
+ c_birth_year Int64,
+ c_birth_country Utf8,
+ c_login Utf8,
+ c_email_address Utf8,
+ c_last_review_date Utf8,
+ PRIMARY KEY (c_customer_sk)
+)
+PARTITION BY HASH (c_customer_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_site` (
+ web_site_sk Int64 NOT NULL,
+ web_site_id Utf8,
+ web_rec_start_date Date32,
+ web_rec_end_date Date32,
+ web_name Utf8,
+ web_open_date_sk Int64,
+ web_close_date_sk Int64,
+ web_class Utf8,
+ web_manager Utf8,
+ web_mkt_id Int64,
+ web_mkt_class Utf8,
+ web_mkt_desc Utf8,
+ web_market_manager Utf8,
+ web_company_id Int64,
+ web_company_name Utf8,
+ web_street_number Utf8,
+ web_street_name Utf8,
+ web_street_type Utf8,
+ web_suite_number Utf8,
+ web_city Utf8,
+ web_county Utf8,
+ web_state Utf8,
+ web_zip Utf8,
+ web_country Utf8,
+ web_gmt_offset Double,
+ web_tax_percentage Double,
+ PRIMARY KEY (web_site_sk)
+)
+PARTITION BY HASH (web_site_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/store_returns` (
+ sr_returned_date_sk Int64,
+ sr_return_time_sk Int64,
+ sr_item_sk Int64 NOT NULL,
+ sr_customer_sk Int64,
+ sr_cdemo_sk Int64,
+ sr_hdemo_sk Int64,
+ sr_addr_sk Int64,
+ sr_store_sk Int64,
+ sr_reason_sk Int64,
+ sr_ticket_number Int64 NOT NULL,
+ sr_return_quantity Int64,
+ sr_return_amt Double,
+ sr_return_tax Double,
+ sr_return_amt_inc_tax Double,
+ sr_fee Double,
+ sr_return_ship_cost Double,
+ sr_refunded_cash Double,
+ sr_reversed_charge Double,
+ sr_store_credit Double,
+ sr_net_loss Double,
+ PRIMARY KEY (sr_item_sk, sr_ticket_number)
+)
+PARTITION BY HASH (sr_item_sk, sr_ticket_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/household_demographics` (
+ hd_demo_sk Int64 NOT NULL,
+ hd_income_band_sk Int64,
+ hd_buy_potential Utf8,
+ hd_dep_count Int64,
+ hd_vehicle_count Int64,
+ PRIMARY KEY (hd_demo_sk)
+)
+PARTITION BY HASH (hd_demo_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_page` (
+ wp_web_page_sk Int64 NOT NULL,
+ wp_web_page_id Utf8,
+ wp_rec_start_date Date32,
+ wp_rec_end_date Date32,
+ wp_creation_date_sk Int64,
+ wp_access_date_sk Int64,
+ wp_autogen_flag Utf8,
+ wp_customer_sk Int64,
+ wp_url Utf8,
+ wp_type Utf8,
+ wp_char_count Int64,
+ wp_link_count Int64,
+ wp_image_count Int64,
+ wp_max_ad_count Int64,
+ PRIMARY KEY (wp_web_page_sk)
+)
+PARTITION BY HASH (wp_web_page_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/promotion` (
+ p_promo_sk Int64 NOT NULL,
+ p_promo_id Utf8,
+ p_start_date_sk Int64,
+ p_end_date_sk Int64,
+ p_item_sk Int64,
+ p_cost Double,
+ p_response_target Int64,
+ p_promo_name Utf8,
+ p_channel_dmail Utf8,
+ p_channel_email Utf8,
+ p_channel_catalog Utf8,
+ p_channel_tv Utf8,
+ p_channel_radio Utf8,
+ p_channel_press Utf8,
+ p_channel_event Utf8,
+ p_channel_demo Utf8,
+ p_channel_details Utf8,
+ p_purpose Utf8,
+ p_discount_active Utf8,
+ PRIMARY KEY (p_promo_sk)
+)
+PARTITION BY HASH (p_promo_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_page` (
+ cp_catalog_page_sk Int64 NOT NULL,
+ cp_catalog_page_id Utf8,
+ cp_start_date_sk Int64,
+ cp_end_date_sk Int64,
+ cp_department Utf8,
+ cp_catalog_number Int64,
+ cp_catalog_page_number Int64,
+ cp_description Utf8,
+ cp_type Utf8,
+ PRIMARY KEY (cp_catalog_page_sk)
+)
+PARTITION BY HASH (cp_catalog_page_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/inventory` (
+ inv_date_sk Int64 NOT NULL,
+ inv_item_sk Int64 NOT NULL,
+ inv_warehouse_sk Int64 NOT NULL,
+ inv_quantity_on_hand Int64,
+ PRIMARY KEY (inv_date_sk, inv_item_sk, inv_warehouse_sk)
+)
+PARTITION BY HASH (inv_date_sk, inv_item_sk, inv_warehouse_sk)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_returns` (
+ cr_returned_date_sk Int64,
+ cr_returned_time_sk Int64,
+ cr_item_sk Int64 NOT NULL,
+ cr_refunded_customer_sk Int64,
+ cr_refunded_cdemo_sk Int64,
+ cr_refunded_hdemo_sk Int64,
+ cr_refunded_addr_sk Int64,
+ cr_returning_customer_sk Int64,
+ cr_returning_cdemo_sk Int64,
+ cr_returning_hdemo_sk Int64,
+ cr_returning_addr_sk Int64,
+ cr_call_center_sk Int64,
+ cr_catalog_page_sk Int64,
+ cr_ship_mode_sk Int64,
+ cr_warehouse_sk Int64,
+ cr_reason_sk Int64,
+ cr_order_number Int64 NOT NULL,
+ cr_return_quantity Int64,
+ cr_return_amount Double,
+ cr_return_tax Double,
+ cr_return_amt_inc_tax Double,
+ cr_fee Double,
+ cr_return_ship_cost Double,
+ cr_refunded_cash Double,
+ cr_reversed_charge Double,
+ cr_store_credit Double,
+ cr_net_loss Double,
+ PRIMARY KEY (cr_item_sk, cr_order_number)
+)
+PARTITION BY HASH (cr_item_sk, cr_order_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_returns` (
+ wr_returned_date_sk Int64,
+ wr_returned_time_sk Int64,
+ wr_item_sk Int64 NOT NULL,
+ wr_refunded_customer_sk Int64,
+ wr_refunded_cdemo_sk Int64,
+ wr_refunded_hdemo_sk Int64,
+ wr_refunded_addr_sk Int64,
+ wr_returning_customer_sk Int64,
+ wr_returning_cdemo_sk Int64,
+ wr_returning_hdemo_sk Int64,
+ wr_returning_addr_sk Int64,
+ wr_web_page_sk Int64,
+ wr_reason_sk Int64,
+ wr_order_number Int64 NOT NULL,
+ wr_return_quantity Int64,
+ wr_return_amt Double,
+ wr_return_tax Double,
+ wr_return_amt_inc_tax Double,
+ wr_fee Double,
+ wr_return_ship_cost Double,
+ wr_refunded_cash Double,
+ wr_reversed_charge Double,
+ wr_account_credit Double,
+ wr_net_loss Double,
+ PRIMARY KEY (wr_item_sk, wr_order_number)
+)
+PARTITION BY HASH (wr_item_sk, wr_order_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_sales` (
+ ws_sold_date_sk Int64,
+ ws_sold_time_sk Int64,
+ ws_ship_date_sk Int64,
+ ws_item_sk Int64 NOT NULL,
+ ws_bill_customer_sk Int64,
+ ws_bill_cdemo_sk Int64,
+ ws_bill_hdemo_sk Int64,
+ ws_bill_addr_sk Int64,
+ ws_ship_customer_sk Int64,
+ ws_ship_cdemo_sk Int64,
+ ws_ship_hdemo_sk Int64,
+ ws_ship_addr_sk Int64,
+ ws_web_page_sk Int64,
+ ws_web_site_sk Int64,
+ ws_ship_mode_sk Int64,
+ ws_warehouse_sk Int64,
+ ws_promo_sk Int64,
+ ws_order_number Int64 NOT NULL,
+ ws_quantity Int64,
+ ws_wholesale_cost Double,
+ ws_list_price Double,
+ ws_sales_price Double,
+ ws_ext_discount_amt Double,
+ ws_ext_sales_price Double,
+ ws_ext_wholesale_cost Double,
+ ws_ext_list_price Double,
+ ws_ext_tax Double,
+ ws_coupon_amt Double,
+ ws_ext_ship_cost Double,
+ ws_net_paid Double,
+ ws_net_paid_inc_tax Double,
+ ws_net_paid_inc_ship Double,
+ ws_net_paid_inc_ship_tax Double,
+ ws_net_profit Double,
+ PRIMARY KEY (ws_item_sk, ws_order_number)
+)
+PARTITION BY HASH (ws_item_sk, ws_order_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_sales` (
+ cs_sold_date_sk Int64,
+ cs_sold_time_sk Int64,
+ cs_ship_date_sk Int64,
+ cs_bill_customer_sk Int64,
+ cs_bill_cdemo_sk Int64,
+ cs_bill_hdemo_sk Int64,
+ cs_bill_addr_sk Int64,
+ cs_ship_customer_sk Int64,
+ cs_ship_cdemo_sk Int64,
+ cs_ship_hdemo_sk Int64,
+ cs_ship_addr_sk Int64,
+ cs_call_center_sk Int64,
+ cs_catalog_page_sk Int64,
+ cs_ship_mode_sk Int64,
+ cs_warehouse_sk Int64,
+ cs_item_sk Int64 NOT NULL,
+ cs_promo_sk Int64,
+ cs_order_number Int64 NOT NULL,
+ cs_quantity Int64,
+ cs_wholesale_cost Double,
+ cs_list_price Double,
+ cs_sales_price Double,
+ cs_ext_discount_amt Double,
+ cs_ext_sales_price Double,
+ cs_ext_wholesale_cost Double,
+ cs_ext_list_price Double,
+ cs_ext_tax Double,
+ cs_coupon_amt Double,
+ cs_ext_ship_cost Double,
+ cs_net_paid Double,
+ cs_net_paid_inc_tax Double,
+ cs_net_paid_inc_ship Double,
+ cs_net_paid_inc_ship_tax Double,
+ cs_net_profit Double,
+ PRIMARY KEY (cs_item_sk, cs_order_number)
+)
+PARTITION BY HASH (cs_item_sk, cs_order_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+CREATE TABLE `/Root/db/Root/db/tpcds/s1/store_sales` (
+ ss_sold_date_sk Int64,
+ ss_sold_time_sk Int64,
+ ss_item_sk Int64 NOT NULL,
+ ss_customer_sk Int64,
+ ss_cdemo_sk Int64,
+ ss_hdemo_sk Int64,
+ ss_addr_sk Int64,
+ ss_store_sk Int64,
+ ss_promo_sk Int64,
+ ss_ticket_number Int64 NOT NULL,
+ ss_quantity Int64,
+ ss_wholesale_cost Double,
+ ss_list_price Double,
+ ss_sales_price Double,
+ ss_ext_discount_amt Double,
+ ss_ext_sales_price Double,
+ ss_ext_wholesale_cost Double,
+ ss_ext_list_price Double,
+ ss_ext_tax Double,
+ ss_coupon_amt Double,
+ ss_net_paid Double,
+ ss_net_paid_inc_tax Double,
+ ss_net_profit Double,
+ PRIMARY KEY (ss_item_sk, ss_ticket_number)
+)
+PARTITION BY HASH (ss_item_sk, ss_ticket_number)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
+);
+
+Init tables ...Ok
diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column
new file mode 100644
index 00000000000..47806ca50fa
--- /dev/null
+++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column
@@ -0,0 +1,137 @@
+Init tables ...
+--!syntax_v1
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/customer` (
+ c_acctbal Double NOT NULL,
+ c_address Utf8 NOT NULL,
+ c_comment Utf8 NOT NULL,
+ c_custkey Int64 NOT NULL,
+ c_mktsegment Utf8 NOT NULL,
+ c_name Utf8 NOT NULL,
+ c_nationkey Int32 NOT NULL,
+ c_phone Utf8 NOT NULL,
+ PRIMARY KEY (c_custkey)
+)
+PARTITION BY HASH (c_custkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` (
+ l_comment Utf8 NOT NULL,
+ l_commitdate Date32 NOT NULL,
+ l_discount Double NOT NULL,
+ l_extendedprice Double NOT NULL,
+ l_linenumber Int32 NOT NULL,
+ l_linestatus Utf8 NOT NULL,
+ l_orderkey Int64 NOT NULL,
+ l_partkey Int64 NOT NULL,
+ l_quantity Double NOT NULL,
+ l_receiptdate Date32 NOT NULL,
+ l_returnflag Utf8 NOT NULL,
+ l_shipdate Date32 NOT NULL,
+ l_shipinstruct Utf8 NOT NULL,
+ l_shipmode Utf8 NOT NULL,
+ l_suppkey Int64 NOT NULL,
+ l_tax Double NOT NULL,
+ PRIMARY KEY (l_orderkey, l_linenumber)
+)
+PARTITION BY HASH (l_orderkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/nation` (
+ n_comment Utf8 NOT NULL,
+ n_name Utf8 NOT NULL,
+ n_nationkey Int32 NOT NULL,
+ n_regionkey Int32 NOT NULL,
+ PRIMARY KEY (n_nationkey)
+)
+PARTITION BY HASH (n_nationkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/orders` (
+ o_clerk Utf8 NOT NULL,
+ o_comment Utf8 NOT NULL,
+ o_custkey Int64 NOT NULL,
+ o_orderdate Date32 NOT NULL,
+ o_orderkey Int64 NOT NULL,
+ o_orderpriority Utf8 NOT NULL,
+ o_orderstatus Utf8 NOT NULL,
+ o_shippriority Int32 NOT NULL,
+ o_totalprice Double NOT NULL,
+ PRIMARY KEY (o_orderkey)
+)
+PARTITION BY HASH (o_orderkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/part` (
+ p_brand Utf8 NOT NULL,
+ p_comment Utf8 NOT NULL,
+ p_container Utf8 NOT NULL,
+ p_mfgr Utf8 NOT NULL,
+ p_name Utf8 NOT NULL,
+ p_partkey Int64 NOT NULL,
+ p_retailprice Double NOT NULL,
+ p_size Int32 NOT NULL,
+ p_type Utf8 NOT NULL,
+ PRIMARY KEY (p_partkey)
+)
+PARTITION BY HASH (p_partkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/partsupp` (
+ ps_availqty Int32 NOT NULL,
+ ps_comment Utf8 NOT NULL,
+ ps_partkey Int64 NOT NULL,
+ ps_suppkey Int64 NOT NULL,
+ ps_supplycost Double NOT NULL,
+ PRIMARY KEY (ps_partkey, ps_suppkey)
+)
+PARTITION BY HASH (ps_partkey, ps_suppkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/region` (
+ r_comment Utf8 NOT NULL,
+ r_name Utf8 NOT NULL,
+ r_regionkey Int32 NOT NULL,
+ PRIMARY KEY (r_regionkey)
+)
+PARTITION BY HASH (r_regionkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1
+);
+
+CREATE TABLE `/Root/db/Root/db/tpch/s1/supplier` (
+ s_acctbal Double NOT NULL,
+ s_address Utf8 NOT NULL,
+ s_comment Utf8 NOT NULL,
+ s_name Utf8 NOT NULL,
+ s_nationkey Int32 NOT NULL,
+ s_phone Utf8 NOT NULL,
+ s_suppkey Int64 NOT NULL,
+ PRIMARY KEY (s_suppkey)
+)
+PARTITION BY HASH (s_suppkey)
+WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256
+);
+
+Init tables ...Ok
diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column
index 45e7e60ce35..098d7418a43 100644
--- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column
+++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column
@@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` (
l_tax Double NOT NULL,
PRIMARY KEY (l_orderkey, l_linenumber)
)
-PARTITION BY HASH (l_orderkey, l_linenumber)
+PARTITION BY HASH (l_orderkey)
WITH (
STORE = COLUMN,
AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal
index a87903fc168..fe6e09bd1a7 100644
--- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal
+++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal
@@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` (
l_tax Decimal(12,2) NOT NULL,
PRIMARY KEY (l_orderkey, l_linenumber)
)
-PARTITION BY HASH (l_orderkey, l_linenumber)
+PARTITION BY HASH (l_orderkey)
WITH (
STORE = COLUMN,
AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb
index 2ceafd6902d..a29220d4795 100644
--- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb
+++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb
@@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` (
l_tax Decimal(22,9) NOT NULL,
PRIMARY KEY (l_orderkey, l_linenumber)
)
-PARTITION BY HASH (l_orderkey, l_linenumber)
+PARTITION BY HASH (l_orderkey)
WITH (
STORE = COLUMN,
AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64
diff --git a/ydb/tests/functional/benchmarks_init/test_init.py b/ydb/tests/functional/benchmarks_init/test_init.py
index f20511ead93..5f9137e4da8 100644
--- a/ydb/tests/functional/benchmarks_init/test_init.py
+++ b/ydb/tests/functional/benchmarks_init/test_init.py
@@ -79,6 +79,9 @@ class TpcInitBase(InitBase):
def test_s1_column_decimal_ydb(self):
return self.canonical_result(self.execute_init(scale=1, args=['--store', 'column', '--float-mode', 'decimal_ydb']), self.tmp_path('s1_column_decimal_ydb'))
+ def test_s100_column(self):
+ return self.canonical_result(self.execute_init(scale=1, args=['--store', 'column', '--scale', '100']), self.tmp_path('s100_column'))
+
class TestTpchInit(TpcInitBase):
workload = 'tpch'