diff options
author | Олег <150132506+iddqdex@users.noreply.github.com> | 2025-04-18 15:41:40 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-18 12:41:40 +0000 |
commit | a0b13519c60db31ac0ec1bb377a07172f8a8b95a (patch) | |
tree | a0975dc6814e2f3bbfc45a7ac7792201e339523a | |
parent | 68c43aed9070c11917f3b1bb79f11197910d4a6a (diff) | |
download | ydb-a0b13519c60db31ac0ec1bb377a07172f8a8b95a.tar.gz |
change partitioning in tpch (#17416)
Co-authored-by: Nikolay Perfilov <pnv1@yandex-team.ru>
16 files changed, 828 insertions, 14 deletions
diff --git a/ydb/apps/ydb/CHANGELOG.md b/ydb/apps/ydb/CHANGELOG.md index 637e78e4e2b..8fe9b9c2cdd 100644 --- a/ydb/apps/ydb/CHANGELOG.md +++ b/ydb/apps/ydb/CHANGELOG.md @@ -1,3 +1,4 @@ +* Added `--scale` option to `ydb workload tpch init` and `ydb workload tpcds init` commands. Sets the percentage of the benchmark's data size and workload to use, relative to full scale. * Added "--no-discovery" option. It allows to skip discovery and use user provided endpoint to connect to YDB cluster. * Added `--retries` to `ydb workload <clickbenh|tpch|tpcds> run` command. * Added `--partition-size` param to `ydb workload <clickbench/tpcds/tpch> init`. diff --git a/ydb/library/workload/benchmark_base/workload.cpp b/ydb/library/workload/benchmark_base/workload.cpp index 7e5d09aaffe..cb413f4cca0 100644 --- a/ydb/library/workload/benchmark_base/workload.cpp +++ b/ydb/library/workload/benchmark_base/workload.cpp @@ -35,6 +35,22 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () { return settings.SerializeAsString(); } (); +namespace { + + TString KeysList(const NJson::TJsonValue& table, const TString& key) { + TVector<TStringBuf> keysV; + for (const auto& k: table[key].GetArray()) { + keysV.emplace_back(k.GetString()); + } + return JoinSeq(", ", keysV); + } + +} + +ui32 TWorkloadGeneratorBase::GetDefaultPartitionsCount(const TString& /*tableName*/) const { + return 64; +} + void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const { auto specialTypes = GetSpecialDataTypes(); specialTypes["string_type"] = Params.GetStringType(); @@ -65,11 +81,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ } } result << JoinSeq(",\n", columns); - TVector<TStringBuf> keysV; - for (const auto& k: table["primary_key"].GetArray()) { - keysV.emplace_back(k.GetString()); - } - const TString keys = JoinSeq(", ", keysV); + const auto keys = KeysList(table, "primary_key"); if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) { result << Endl; } else { @@ -78,7 +90,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ result << ")" << Endl; if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) { - result << "PARTITION BY HASH (" << keys << ")" << Endl; + result << "PARTITION BY HASH (" << (table.Has("partition_by") ? KeysList(table, "partition_by") : keys) << ")" << Endl; } result << "WITH (" << Endl; @@ -89,12 +101,12 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ break; case TWorkloadBaseParams::EStoreType::Column: result << " STORE = COLUMN," << Endl; - result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl; + result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl; break; case TWorkloadBaseParams::EStoreType::Row: result << " STORE = ROW," << Endl; result << " AUTO_PARTITIONING_PARTITION_SIZE_MB = " << Params.GetPartitionSizeMb() << ", " << Endl; - result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl; + result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl; } result << ");" << Endl; } diff --git a/ydb/library/workload/benchmark_base/workload.h b/ydb/library/workload/benchmark_base/workload.h index 52cb0c2774c..6ddf1cbd917 100644 --- a/ydb/library/workload/benchmark_base/workload.h +++ b/ydb/library/workload/benchmark_base/workload.h @@ -53,6 +53,7 @@ protected: virtual TString GetTablesYaml() const = 0; virtual TSpecialDataTypes GetSpecialDataTypes() const = 0; NJson::TJsonValue GetTablesJson() const; + virtual ui32 GetDefaultPartitionsCount(const TString& tableName) const; THolder<TGeneratorStateProcessor> StateProcessor; private: diff --git a/ydb/library/workload/tpc_base/tpc_base.cpp b/ydb/library/workload/tpc_base/tpc_base.cpp index 1a91da20851..0e02ee556ee 100644 --- a/ydb/library/workload/tpc_base/tpc_base.cpp +++ b/ydb/library/workload/tpc_base/tpc_base.cpp @@ -167,7 +167,7 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm .StoreResult(&ExternalQueriesDir); opts.AddLongOption( "syntax", "Query syntax [" + GetEnumAllNames<EQuerySyntax>() + "].") .StoreResult(&Syntax).DefaultValue(Syntax); - opts.AddLongOption("scale", "scale in percents") + opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.") .DefaultValue(Scale).StoreResult(&Scale); opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.") .StoreResult(&FloatMode).DefaultValue(FloatMode); @@ -177,6 +177,8 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm case TWorkloadParams::ECommandType::Init: opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.") .StoreResult(&FloatMode).DefaultValue(FloatMode); + opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.") + .DefaultValue(Scale).StoreResult(&Scale); break; default: break; diff --git a/ydb/library/workload/tpcds/data_generator.cpp b/ydb/library/workload/tpcds/data_generator.cpp index b15589ca550..495ba99f8d5 100644 --- a/ydb/library/workload/tpcds/data_generator.cpp +++ b/ydb/library/workload/tpcds/data_generator.cpp @@ -19,7 +19,7 @@ TTpcdsWorkloadDataInitializerGenerator::TTpcdsWorkloadDataInitializerGenerator(c void TTpcdsWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) { TWorkloadDataInitializerBase::ConfigureOpts(opts); - opts.AddLongOption("scale", "scale in percents") + opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.") .DefaultValue(Scale).StoreResult(&Scale); opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n" "Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys())) diff --git a/ydb/library/workload/tpch/data_generator.cpp b/ydb/library/workload/tpch/data_generator.cpp index 1fdbe46142b..24e2f74e8a9 100644 --- a/ydb/library/workload/tpch/data_generator.cpp +++ b/ydb/library/workload/tpch/data_generator.cpp @@ -11,7 +11,7 @@ TTpchWorkloadDataInitializerGenerator::TTpchWorkloadDataInitializerGenerator(con void TTpchWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) { TWorkloadDataInitializerBase::ConfigureOpts(opts); - opts.AddLongOption("scale", "scale in percents") + opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.") .DefaultValue(Scale).StoreResult(&Scale); opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n" "Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys())) diff --git a/ydb/library/workload/tpch/tpch.cpp b/ydb/library/workload/tpch/tpch.cpp index 183f3a0e7b1..13de9152902 100644 --- a/ydb/library/workload/tpch/tpch.cpp +++ b/ydb/library/workload/tpch/tpch.cpp @@ -29,6 +29,10 @@ TWorkloadGeneratorBase::TSpecialDataTypes TTpchWorkloadGenerator::GetSpecialData } } +ui32 TTpchWorkloadGenerator::GetDefaultPartitionsCount(const TString& /*tableName*/) const { + return Params.GetScale() <= 10 ? 64 : 256; +} + THolder<IWorkloadQueryGenerator> TTpchWorkloadParams::CreateGenerator() const { return MakeHolder<TTpchWorkloadGenerator>(*this); diff --git a/ydb/library/workload/tpch/tpch.h b/ydb/library/workload/tpch/tpch.h index bfe5506c493..7e801ba73ec 100644 --- a/ydb/library/workload/tpch/tpch.h +++ b/ydb/library/workload/tpch/tpch.h @@ -19,6 +19,7 @@ public: protected: TString GetTablesYaml() const override; TWorkloadGeneratorBase::TSpecialDataTypes GetSpecialDataTypes() const override; + ui32 GetDefaultPartitionsCount(const TString& tableName) const override; private: const TTpchWorkloadParams& Params; diff --git a/ydb/library/workload/tpch/tpch_schema.yaml b/ydb/library/workload/tpch/tpch_schema.yaml index 4041d510183..2bcfd8cdcb3 100644 --- a/ydb/library/workload/tpch/tpch_schema.yaml +++ b/ydb/library/workload/tpch/tpch_schema.yaml @@ -81,6 +81,8 @@ tables: primary_key: - l_orderkey - l_linenumber + partition_by: + - l_orderkey - name: nation columns: diff --git a/ydb/tests/functional/benchmarks_init/canondata/result.json b/ydb/tests/functional/benchmarks_init/canondata/result.json index 4ae82132a23..ff54afb3c35 100644 --- a/ydb/tests/functional/benchmarks_init/canondata/result.json +++ b/ydb/tests/functional/benchmarks_init/canondata/result.json @@ -32,6 +32,9 @@ "test_init.TestClickbenchInit.test_s1_s3": { "uri": "file://test_init.TestClickbenchInit.test_s1_s3/s1_s3" }, + "test_init.TestTpcdsInit.test_s100_column": { + "uri": "file://test_init.TestTpcdsInit.test_s100_column/s100_column" + }, "test_init.TestTpcdsInit.test_s1_column": { "uri": "file://test_init.TestTpcdsInit.test_s1_column/s1_column" }, @@ -47,6 +50,9 @@ "test_init.TestTpcdsInit.test_s1_s3": { "uri": "file://test_init.TestTpcdsInit.test_s1_s3/s1_s3" }, + "test_init.TestTpchInit.test_s100_column": { + "uri": "file://test_init.TestTpchInit.test_s100_column/s100_column" + }, "test_init.TestTpchInit.test_s1_column": { "uri": "file://test_init.TestTpchInit.test_s1_column/s1_column" }, diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column new file mode 100644 index 00000000000..158ec1509f1 --- /dev/null +++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpcdsInit.test_s100_column/s100_column @@ -0,0 +1,645 @@ +Init tables ... +--!syntax_v1 + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer_address` ( + ca_address_sk Int64 NOT NULL, + ca_address_id Utf8, + ca_street_number Utf8, + ca_street_name Utf8, + ca_street_type Utf8, + ca_suite_number Utf8, + ca_city Utf8, + ca_county Utf8, + ca_state Utf8, + ca_zip Utf8, + ca_country Utf8, + ca_gmt_offset Double, + ca_location_type Utf8, + PRIMARY KEY (ca_address_sk) +) +PARTITION BY HASH (ca_address_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer_demographics` ( + cd_demo_sk Int64 NOT NULL, + cd_gender Utf8, + cd_marital_status Utf8, + cd_education_status Utf8, + cd_purchase_estimate Int64, + cd_credit_rating Utf8, + cd_dep_count Int64, + cd_dep_employed_count Int64, + cd_dep_college_count Int64, + PRIMARY KEY (cd_demo_sk) +) +PARTITION BY HASH (cd_demo_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/date_dim` ( + d_date_sk Int64 NOT NULL, + d_date_id Utf8, + d_date Utf8, + d_month_seq Int64, + d_week_seq Int64, + d_quarter_seq Int64, + d_year Int64, + d_dow Int64, + d_moy Int64, + d_dom Int64, + d_qoy Int64, + d_fy_year Int64, + d_fy_quarter_seq Int64, + d_fy_week_seq Int64, + d_day_name Utf8, + d_quarter_name Utf8, + d_holiday Utf8, + d_weekend Utf8, + d_following_holiday Utf8, + d_first_dom Int64, + d_last_dom Int64, + d_same_day_ly Int64, + d_same_day_lq Int64, + d_current_day Utf8, + d_current_week Utf8, + d_current_month Utf8, + d_current_quarter Utf8, + d_current_year Utf8, + PRIMARY KEY (d_date_sk) +) +PARTITION BY HASH (d_date_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/warehouse` ( + w_warehouse_sk Int64 NOT NULL, + w_warehouse_id Utf8, + w_warehouse_name Utf8, + w_warehouse_sq_ft Int64, + w_street_number Utf8, + w_street_name Utf8, + w_street_type Utf8, + w_suite_number Utf8, + w_city Utf8, + w_county Utf8, + w_state Utf8, + w_zip Utf8, + w_country Utf8, + w_gmt_offset Double, + PRIMARY KEY (w_warehouse_sk) +) +PARTITION BY HASH (w_warehouse_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/ship_mode` ( + sm_ship_mode_sk Int64 NOT NULL, + sm_ship_mode_id Utf8, + sm_type Utf8, + sm_code Utf8, + sm_carrier Utf8, + sm_contract Utf8, + PRIMARY KEY (sm_ship_mode_sk) +) +PARTITION BY HASH (sm_ship_mode_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/time_dim` ( + t_time_sk Int64 NOT NULL, + t_time_id Utf8, + t_time Int64, + t_hour Int64, + t_minute Int64, + t_second Int64, + t_am_pm Utf8, + t_shift Utf8, + t_sub_shift Utf8, + t_meal_time Utf8, + PRIMARY KEY (t_time_sk) +) +PARTITION BY HASH (t_time_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/reason` ( + r_reason_sk Int64 NOT NULL, + r_reason_id Utf8, + r_reason_desc Utf8, + PRIMARY KEY (r_reason_sk) +) +PARTITION BY HASH (r_reason_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/income_band` ( + ib_income_band_sk Int64 NOT NULL, + ib_lower_bound Int64, + ib_upper_bound Int64, + PRIMARY KEY (ib_income_band_sk) +) +PARTITION BY HASH (ib_income_band_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/item` ( + i_item_sk Int64 NOT NULL, + i_item_id Utf8, + i_rec_start_date Date32, + i_rec_end_date Date32, + i_item_desc Utf8, + i_current_price Double, + i_wholesale_cost Double, + i_brand_id Int64, + i_brand Utf8, + i_class_id Int64, + i_class Utf8, + i_category_id Int64, + i_category Utf8, + i_manufact_id Int64, + i_manufact Utf8, + i_size Utf8, + i_formulation Utf8, + i_color Utf8, + i_units Utf8, + i_container Utf8, + i_manager_id Int64, + i_product_name Utf8, + PRIMARY KEY (i_item_sk) +) +PARTITION BY HASH (i_item_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/store` ( + s_store_sk Int64 NOT NULL, + s_store_id Utf8, + s_rec_start_date Date32, + s_rec_end_date Date32, + s_closed_date_sk Int64, + s_store_name Utf8, + s_number_employees Int64, + s_floor_space Int64, + s_hours Utf8, + s_manager Utf8, + s_market_id Int64, + s_geography_class Utf8, + s_market_desc Utf8, + s_market_manager Utf8, + s_division_id Int64, + s_division_name Utf8, + s_company_id Int64, + s_company_name Utf8, + s_street_number Utf8, + s_street_name Utf8, + s_street_type Utf8, + s_suite_number Utf8, + s_city Utf8, + s_county Utf8, + s_state Utf8, + s_zip Utf8, + s_country Utf8, + s_gmt_offset Double, + s_tax_precentage Double, + PRIMARY KEY (s_store_sk) +) +PARTITION BY HASH (s_store_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/call_center` ( + cc_call_center_sk Int64 NOT NULL, + cc_call_center_id Utf8, + cc_rec_start_date Date32, + cc_rec_end_date Date32, + cc_closed_date_sk Int64, + cc_open_date_sk Int64, + cc_name Utf8, + cc_class Utf8, + cc_employees Int64, + cc_sq_ft Int64, + cc_hours Utf8, + cc_manager Utf8, + cc_mkt_id Int64, + cc_mkt_class Utf8, + cc_mkt_desc Utf8, + cc_market_manager Utf8, + cc_division Int64, + cc_division_name Utf8, + cc_company Int64, + cc_company_name Utf8, + cc_street_number Utf8, + cc_street_name Utf8, + cc_street_type Utf8, + cc_suite_number Utf8, + cc_city Utf8, + cc_county Utf8, + cc_state Utf8, + cc_zip Utf8, + cc_country Utf8, + cc_gmt_offset Double, + cc_tax_percentage Double, + PRIMARY KEY (cc_call_center_sk) +) +PARTITION BY HASH (cc_call_center_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/customer` ( + c_customer_sk Int64 NOT NULL, + c_customer_id Utf8, + c_current_cdemo_sk Int64, + c_current_hdemo_sk Int64, + c_current_addr_sk Int64, + c_first_shipto_date_sk Int64, + c_first_sales_date_sk Int64, + c_salutation Utf8, + c_first_name Utf8, + c_last_name Utf8, + c_preferred_cust_flag Utf8, + c_birth_day Int64, + c_birth_month Int64, + c_birth_year Int64, + c_birth_country Utf8, + c_login Utf8, + c_email_address Utf8, + c_last_review_date Utf8, + PRIMARY KEY (c_customer_sk) +) +PARTITION BY HASH (c_customer_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_site` ( + web_site_sk Int64 NOT NULL, + web_site_id Utf8, + web_rec_start_date Date32, + web_rec_end_date Date32, + web_name Utf8, + web_open_date_sk Int64, + web_close_date_sk Int64, + web_class Utf8, + web_manager Utf8, + web_mkt_id Int64, + web_mkt_class Utf8, + web_mkt_desc Utf8, + web_market_manager Utf8, + web_company_id Int64, + web_company_name Utf8, + web_street_number Utf8, + web_street_name Utf8, + web_street_type Utf8, + web_suite_number Utf8, + web_city Utf8, + web_county Utf8, + web_state Utf8, + web_zip Utf8, + web_country Utf8, + web_gmt_offset Double, + web_tax_percentage Double, + PRIMARY KEY (web_site_sk) +) +PARTITION BY HASH (web_site_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/store_returns` ( + sr_returned_date_sk Int64, + sr_return_time_sk Int64, + sr_item_sk Int64 NOT NULL, + sr_customer_sk Int64, + sr_cdemo_sk Int64, + sr_hdemo_sk Int64, + sr_addr_sk Int64, + sr_store_sk Int64, + sr_reason_sk Int64, + sr_ticket_number Int64 NOT NULL, + sr_return_quantity Int64, + sr_return_amt Double, + sr_return_tax Double, + sr_return_amt_inc_tax Double, + sr_fee Double, + sr_return_ship_cost Double, + sr_refunded_cash Double, + sr_reversed_charge Double, + sr_store_credit Double, + sr_net_loss Double, + PRIMARY KEY (sr_item_sk, sr_ticket_number) +) +PARTITION BY HASH (sr_item_sk, sr_ticket_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/household_demographics` ( + hd_demo_sk Int64 NOT NULL, + hd_income_band_sk Int64, + hd_buy_potential Utf8, + hd_dep_count Int64, + hd_vehicle_count Int64, + PRIMARY KEY (hd_demo_sk) +) +PARTITION BY HASH (hd_demo_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_page` ( + wp_web_page_sk Int64 NOT NULL, + wp_web_page_id Utf8, + wp_rec_start_date Date32, + wp_rec_end_date Date32, + wp_creation_date_sk Int64, + wp_access_date_sk Int64, + wp_autogen_flag Utf8, + wp_customer_sk Int64, + wp_url Utf8, + wp_type Utf8, + wp_char_count Int64, + wp_link_count Int64, + wp_image_count Int64, + wp_max_ad_count Int64, + PRIMARY KEY (wp_web_page_sk) +) +PARTITION BY HASH (wp_web_page_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/promotion` ( + p_promo_sk Int64 NOT NULL, + p_promo_id Utf8, + p_start_date_sk Int64, + p_end_date_sk Int64, + p_item_sk Int64, + p_cost Double, + p_response_target Int64, + p_promo_name Utf8, + p_channel_dmail Utf8, + p_channel_email Utf8, + p_channel_catalog Utf8, + p_channel_tv Utf8, + p_channel_radio Utf8, + p_channel_press Utf8, + p_channel_event Utf8, + p_channel_demo Utf8, + p_channel_details Utf8, + p_purpose Utf8, + p_discount_active Utf8, + PRIMARY KEY (p_promo_sk) +) +PARTITION BY HASH (p_promo_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_page` ( + cp_catalog_page_sk Int64 NOT NULL, + cp_catalog_page_id Utf8, + cp_start_date_sk Int64, + cp_end_date_sk Int64, + cp_department Utf8, + cp_catalog_number Int64, + cp_catalog_page_number Int64, + cp_description Utf8, + cp_type Utf8, + PRIMARY KEY (cp_catalog_page_sk) +) +PARTITION BY HASH (cp_catalog_page_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/inventory` ( + inv_date_sk Int64 NOT NULL, + inv_item_sk Int64 NOT NULL, + inv_warehouse_sk Int64 NOT NULL, + inv_quantity_on_hand Int64, + PRIMARY KEY (inv_date_sk, inv_item_sk, inv_warehouse_sk) +) +PARTITION BY HASH (inv_date_sk, inv_item_sk, inv_warehouse_sk) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_returns` ( + cr_returned_date_sk Int64, + cr_returned_time_sk Int64, + cr_item_sk Int64 NOT NULL, + cr_refunded_customer_sk Int64, + cr_refunded_cdemo_sk Int64, + cr_refunded_hdemo_sk Int64, + cr_refunded_addr_sk Int64, + cr_returning_customer_sk Int64, + cr_returning_cdemo_sk Int64, + cr_returning_hdemo_sk Int64, + cr_returning_addr_sk Int64, + cr_call_center_sk Int64, + cr_catalog_page_sk Int64, + cr_ship_mode_sk Int64, + cr_warehouse_sk Int64, + cr_reason_sk Int64, + cr_order_number Int64 NOT NULL, + cr_return_quantity Int64, + cr_return_amount Double, + cr_return_tax Double, + cr_return_amt_inc_tax Double, + cr_fee Double, + cr_return_ship_cost Double, + cr_refunded_cash Double, + cr_reversed_charge Double, + cr_store_credit Double, + cr_net_loss Double, + PRIMARY KEY (cr_item_sk, cr_order_number) +) +PARTITION BY HASH (cr_item_sk, cr_order_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_returns` ( + wr_returned_date_sk Int64, + wr_returned_time_sk Int64, + wr_item_sk Int64 NOT NULL, + wr_refunded_customer_sk Int64, + wr_refunded_cdemo_sk Int64, + wr_refunded_hdemo_sk Int64, + wr_refunded_addr_sk Int64, + wr_returning_customer_sk Int64, + wr_returning_cdemo_sk Int64, + wr_returning_hdemo_sk Int64, + wr_returning_addr_sk Int64, + wr_web_page_sk Int64, + wr_reason_sk Int64, + wr_order_number Int64 NOT NULL, + wr_return_quantity Int64, + wr_return_amt Double, + wr_return_tax Double, + wr_return_amt_inc_tax Double, + wr_fee Double, + wr_return_ship_cost Double, + wr_refunded_cash Double, + wr_reversed_charge Double, + wr_account_credit Double, + wr_net_loss Double, + PRIMARY KEY (wr_item_sk, wr_order_number) +) +PARTITION BY HASH (wr_item_sk, wr_order_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/web_sales` ( + ws_sold_date_sk Int64, + ws_sold_time_sk Int64, + ws_ship_date_sk Int64, + ws_item_sk Int64 NOT NULL, + ws_bill_customer_sk Int64, + ws_bill_cdemo_sk Int64, + ws_bill_hdemo_sk Int64, + ws_bill_addr_sk Int64, + ws_ship_customer_sk Int64, + ws_ship_cdemo_sk Int64, + ws_ship_hdemo_sk Int64, + ws_ship_addr_sk Int64, + ws_web_page_sk Int64, + ws_web_site_sk Int64, + ws_ship_mode_sk Int64, + ws_warehouse_sk Int64, + ws_promo_sk Int64, + ws_order_number Int64 NOT NULL, + ws_quantity Int64, + ws_wholesale_cost Double, + ws_list_price Double, + ws_sales_price Double, + ws_ext_discount_amt Double, + ws_ext_sales_price Double, + ws_ext_wholesale_cost Double, + ws_ext_list_price Double, + ws_ext_tax Double, + ws_coupon_amt Double, + ws_ext_ship_cost Double, + ws_net_paid Double, + ws_net_paid_inc_tax Double, + ws_net_paid_inc_ship Double, + ws_net_paid_inc_ship_tax Double, + ws_net_profit Double, + PRIMARY KEY (ws_item_sk, ws_order_number) +) +PARTITION BY HASH (ws_item_sk, ws_order_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/catalog_sales` ( + cs_sold_date_sk Int64, + cs_sold_time_sk Int64, + cs_ship_date_sk Int64, + cs_bill_customer_sk Int64, + cs_bill_cdemo_sk Int64, + cs_bill_hdemo_sk Int64, + cs_bill_addr_sk Int64, + cs_ship_customer_sk Int64, + cs_ship_cdemo_sk Int64, + cs_ship_hdemo_sk Int64, + cs_ship_addr_sk Int64, + cs_call_center_sk Int64, + cs_catalog_page_sk Int64, + cs_ship_mode_sk Int64, + cs_warehouse_sk Int64, + cs_item_sk Int64 NOT NULL, + cs_promo_sk Int64, + cs_order_number Int64 NOT NULL, + cs_quantity Int64, + cs_wholesale_cost Double, + cs_list_price Double, + cs_sales_price Double, + cs_ext_discount_amt Double, + cs_ext_sales_price Double, + cs_ext_wholesale_cost Double, + cs_ext_list_price Double, + cs_ext_tax Double, + cs_coupon_amt Double, + cs_ext_ship_cost Double, + cs_net_paid Double, + cs_net_paid_inc_tax Double, + cs_net_paid_inc_ship Double, + cs_net_paid_inc_ship_tax Double, + cs_net_profit Double, + PRIMARY KEY (cs_item_sk, cs_order_number) +) +PARTITION BY HASH (cs_item_sk, cs_order_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +CREATE TABLE `/Root/db/Root/db/tpcds/s1/store_sales` ( + ss_sold_date_sk Int64, + ss_sold_time_sk Int64, + ss_item_sk Int64 NOT NULL, + ss_customer_sk Int64, + ss_cdemo_sk Int64, + ss_hdemo_sk Int64, + ss_addr_sk Int64, + ss_store_sk Int64, + ss_promo_sk Int64, + ss_ticket_number Int64 NOT NULL, + ss_quantity Int64, + ss_wholesale_cost Double, + ss_list_price Double, + ss_sales_price Double, + ss_ext_discount_amt Double, + ss_ext_sales_price Double, + ss_ext_wholesale_cost Double, + ss_ext_list_price Double, + ss_ext_tax Double, + ss_coupon_amt Double, + ss_net_paid Double, + ss_net_paid_inc_tax Double, + ss_net_profit Double, + PRIMARY KEY (ss_item_sk, ss_ticket_number) +) +PARTITION BY HASH (ss_item_sk, ss_ticket_number) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 +); + +Init tables ...Ok diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column new file mode 100644 index 00000000000..47806ca50fa --- /dev/null +++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s100_column/s100_column @@ -0,0 +1,137 @@ +Init tables ... +--!syntax_v1 + +CREATE TABLE `/Root/db/Root/db/tpch/s1/customer` ( + c_acctbal Double NOT NULL, + c_address Utf8 NOT NULL, + c_comment Utf8 NOT NULL, + c_custkey Int64 NOT NULL, + c_mktsegment Utf8 NOT NULL, + c_name Utf8 NOT NULL, + c_nationkey Int32 NOT NULL, + c_phone Utf8 NOT NULL, + PRIMARY KEY (c_custkey) +) +PARTITION BY HASH (c_custkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` ( + l_comment Utf8 NOT NULL, + l_commitdate Date32 NOT NULL, + l_discount Double NOT NULL, + l_extendedprice Double NOT NULL, + l_linenumber Int32 NOT NULL, + l_linestatus Utf8 NOT NULL, + l_orderkey Int64 NOT NULL, + l_partkey Int64 NOT NULL, + l_quantity Double NOT NULL, + l_receiptdate Date32 NOT NULL, + l_returnflag Utf8 NOT NULL, + l_shipdate Date32 NOT NULL, + l_shipinstruct Utf8 NOT NULL, + l_shipmode Utf8 NOT NULL, + l_suppkey Int64 NOT NULL, + l_tax Double NOT NULL, + PRIMARY KEY (l_orderkey, l_linenumber) +) +PARTITION BY HASH (l_orderkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/nation` ( + n_comment Utf8 NOT NULL, + n_name Utf8 NOT NULL, + n_nationkey Int32 NOT NULL, + n_regionkey Int32 NOT NULL, + PRIMARY KEY (n_nationkey) +) +PARTITION BY HASH (n_nationkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/orders` ( + o_clerk Utf8 NOT NULL, + o_comment Utf8 NOT NULL, + o_custkey Int64 NOT NULL, + o_orderdate Date32 NOT NULL, + o_orderkey Int64 NOT NULL, + o_orderpriority Utf8 NOT NULL, + o_orderstatus Utf8 NOT NULL, + o_shippriority Int32 NOT NULL, + o_totalprice Double NOT NULL, + PRIMARY KEY (o_orderkey) +) +PARTITION BY HASH (o_orderkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/part` ( + p_brand Utf8 NOT NULL, + p_comment Utf8 NOT NULL, + p_container Utf8 NOT NULL, + p_mfgr Utf8 NOT NULL, + p_name Utf8 NOT NULL, + p_partkey Int64 NOT NULL, + p_retailprice Double NOT NULL, + p_size Int32 NOT NULL, + p_type Utf8 NOT NULL, + PRIMARY KEY (p_partkey) +) +PARTITION BY HASH (p_partkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/partsupp` ( + ps_availqty Int32 NOT NULL, + ps_comment Utf8 NOT NULL, + ps_partkey Int64 NOT NULL, + ps_suppkey Int64 NOT NULL, + ps_supplycost Double NOT NULL, + PRIMARY KEY (ps_partkey, ps_suppkey) +) +PARTITION BY HASH (ps_partkey, ps_suppkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/region` ( + r_comment Utf8 NOT NULL, + r_name Utf8 NOT NULL, + r_regionkey Int32 NOT NULL, + PRIMARY KEY (r_regionkey) +) +PARTITION BY HASH (r_regionkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 +); + +CREATE TABLE `/Root/db/Root/db/tpch/s1/supplier` ( + s_acctbal Double NOT NULL, + s_address Utf8 NOT NULL, + s_comment Utf8 NOT NULL, + s_name Utf8 NOT NULL, + s_nationkey Int32 NOT NULL, + s_phone Utf8 NOT NULL, + s_suppkey Int64 NOT NULL, + PRIMARY KEY (s_suppkey) +) +PARTITION BY HASH (s_suppkey) +WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 256 +); + +Init tables ...Ok diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column index 45e7e60ce35..098d7418a43 100644 --- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column +++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column/s1_column @@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` ( l_tax Double NOT NULL, PRIMARY KEY (l_orderkey, l_linenumber) ) -PARTITION BY HASH (l_orderkey, l_linenumber) +PARTITION BY HASH (l_orderkey) WITH ( STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal index a87903fc168..fe6e09bd1a7 100644 --- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal +++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal/s1_column_decimal @@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` ( l_tax Decimal(12,2) NOT NULL, PRIMARY KEY (l_orderkey, l_linenumber) ) -PARTITION BY HASH (l_orderkey, l_linenumber) +PARTITION BY HASH (l_orderkey) WITH ( STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 diff --git a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb index 2ceafd6902d..a29220d4795 100644 --- a/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb +++ b/ydb/tests/functional/benchmarks_init/canondata/test_init.TestTpchInit.test_s1_column_decimal_ydb/s1_column_decimal_ydb @@ -37,7 +37,7 @@ CREATE TABLE `/Root/db/Root/db/tpch/s1/lineitem` ( l_tax Decimal(22,9) NOT NULL, PRIMARY KEY (l_orderkey, l_linenumber) ) -PARTITION BY HASH (l_orderkey, l_linenumber) +PARTITION BY HASH (l_orderkey) WITH ( STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 64 diff --git a/ydb/tests/functional/benchmarks_init/test_init.py b/ydb/tests/functional/benchmarks_init/test_init.py index f20511ead93..5f9137e4da8 100644 --- a/ydb/tests/functional/benchmarks_init/test_init.py +++ b/ydb/tests/functional/benchmarks_init/test_init.py @@ -79,6 +79,9 @@ class TpcInitBase(InitBase): def test_s1_column_decimal_ydb(self): return self.canonical_result(self.execute_init(scale=1, args=['--store', 'column', '--float-mode', 'decimal_ydb']), self.tmp_path('s1_column_decimal_ydb')) + def test_s100_column(self): + return self.canonical_result(self.execute_init(scale=1, args=['--store', 'column', '--scale', '100']), self.tmp_path('s100_column')) + class TestTpchInit(TpcInitBase): workload = 'tpch' |