summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkungurtsev <[email protected]>2025-09-19 12:28:22 +0200
committerkungasc <[email protected]>2025-09-23 11:56:20 +0300
commit123b6376ce90c698f9c60e65f6b9639c5f03636c (patch)
tree3a2951b8fbd6b1c67f9e3770dd48f7b70484f3a8
parentee3cb799508e266feaecb150d9baf075dd586714 (diff)
DataShard fulltext index build scan (#25028)
-rw-r--r--ydb/core/base/fulltext.cpp42
-rw-r--r--ydb/core/base/fulltext.h4
-rw-r--r--ydb/core/base/table_index.h1
-rw-r--r--ydb/core/base/ut/fulltext_ut.cpp42
-rw-r--r--ydb/core/protos/tx_datashard.proto37
-rw-r--r--ydb/core/tx/datashard/build_index/fulltext.cpp415
-rw-r--r--ydb/core/tx/datashard/build_index/secondary_index.cpp13
-rw-r--r--ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp367
-rw-r--r--ydb/core/tx/datashard/build_index/ut/ya.make1
-rw-r--r--ydb/core/tx/datashard/datashard.h15
-rw-r--r--ydb/core/tx/datashard/datashard_impl.h4
-rw-r--r--ydb/core/tx/datashard/ya.make7
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp2
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp2
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_utils.h32
-rw-r--r--ydb/library/services/services.proto1
16 files changed, 929 insertions, 56 deletions
diff --git a/ydb/core/base/fulltext.cpp b/ydb/core/base/fulltext.cpp
index a23e83f2955..51ced489aff 100644
--- a/ydb/core/base/fulltext.cpp
+++ b/ydb/core/base/fulltext.cpp
@@ -137,27 +137,40 @@ TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSet
return tokens;
}
-bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
+bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) {
error = "layout should be set";
return false;
}
- if (settings.columns().size() != 1) {
- error = TStringBuilder() << "fulltext index should have single column settings"
- << " but have " << settings.columns().size() << " of them";
- return false;
- }
-
for (auto column : settings.columns()) {
if (!column.has_column()) {
- error = "column should be set";
+ error = "fulltext index settings should have a column name";
return false;
}
if (!column.has_analyzers()) {
- error = "column analyzers should be set";
+ error = "fulltext index settings should have analyzers";
return false;
}
+ }
+
+ if (keyColumns.size() != 1) {
+ error = TStringBuilder() << "fulltext index should have a single text key column"
+ << " but have " << keyColumns.size() << " of them";
+ return false;
+ }
+ if (settings.columns().size() != 1) {
+ error = TStringBuilder() << "fulltext index should have a single text key column settings"
+ << " but have " << settings.columns().size() << " of them";
+ return false;
+ }
+ if (keyColumns.at(0) != settings.columns().at(0).column()) {
+ error = TStringBuilder() << "fulltext index should have a single text key column " << keyColumns.at(0) << " settings"
+ << " but have " << settings.columns().at(0).column();
+ return false;
+ }
+
+ for (auto column : settings.columns()) {
if (!ValidateSettings(column.analyzers(), error)) {
return false;
}
@@ -167,7 +180,7 @@ bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString
return true;
}
-Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& settings, TString& error) {
+Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& settings, TString& error) {
Ydb::Table::FulltextIndexSettings result;
Ydb::Table::FulltextIndexSettings::Analyzers resultAnalyzers;
@@ -209,11 +222,16 @@ Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVec
{
// only single-columned index is supported for now
auto columnAnalyzers = result.add_columns();
- columnAnalyzers->set_column(column);
+ columnAnalyzers->set_column(keyColumn);
columnAnalyzers->mutable_analyzers()->CopyFrom(resultAnalyzers);
}
- ValidateSettings(result, error);
+ {
+ NProtoBuf::RepeatedPtrField<TString> keyColumns;
+ TString keyColumn_ = keyColumn;
+ keyColumns.Add(std::move(keyColumn_));
+ ValidateSettings(keyColumns, result, error);
+ }
return result;
}
diff --git a/ydb/core/base/fulltext.h b/ydb/core/base/fulltext.h
index b65e3043622..849b52bfc9e 100644
--- a/ydb/core/base/fulltext.h
+++ b/ydb/core/base/fulltext.h
@@ -8,7 +8,7 @@ namespace NKikimr::NFulltext {
TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings);
-bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error);
-Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& values, TString& error);
+bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error);
+Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& values, TString& error);
}
diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h
index 201e40ae99f..a251122d0b5 100644
--- a/ydb/core/base/table_index.h
+++ b/ydb/core/base/table_index.h
@@ -83,6 +83,7 @@ TClusterId SetPostingParentFlag(TClusterId parent);
namespace NFulltext {
// TODO: support utf-8 in fulltext index
+ inline constexpr auto TokenType = Ydb::Type::STRING;
inline constexpr const char* TokenTypeName = "String";
inline constexpr const char* TokenColumn = "__ydb_token";
diff --git a/ydb/core/base/ut/fulltext_ut.cpp b/ydb/core/base/ut/fulltext_ut.cpp
index 0824748fe0d..d3cb620dca9 100644
--- a/ydb/core/base/ut/fulltext_ut.cpp
+++ b/ydb/core/base/ut/fulltext_ut.cpp
@@ -10,27 +10,51 @@ Y_UNIT_TEST_SUITE(NFulltext) {
Ydb::Table::FulltextIndexSettings settings;
TString error;
- UNIT_ASSERT(!ValidateSettings(settings, error));
+ NProtoBuf::RepeatedPtrField<TString> keyColumns;
+ keyColumns.Add("text");
+
+ UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "layout should be set");
settings.set_layout(Ydb::Table::FulltextIndexSettings::FLAT);
- UNIT_ASSERT(!ValidateSettings(settings, error));
- UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have single column settings but have 0 of them");
+ UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column settings but have 0 of them");
auto columnSettings = settings.add_columns();
- UNIT_ASSERT(!ValidateSettings(settings, error));
- UNIT_ASSERT_VALUES_EQUAL(error, "column should be set");
+ UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have a column name");
columnSettings->set_column("text");
- UNIT_ASSERT(!ValidateSettings(settings, error));
- UNIT_ASSERT_VALUES_EQUAL(error, "column analyzers should be set");
+ UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have analyzers");
auto columnAnalyzers = columnSettings->mutable_analyzers();
- UNIT_ASSERT(!ValidateSettings(settings, error));
+ UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "tokenizer should be set");
columnAnalyzers->set_tokenizer(Ydb::Table::FulltextIndexSettings::STANDARD);
- UNIT_ASSERT_C(ValidateSettings(settings, error), error);
+ {
+ NProtoBuf::RepeatedPtrField<TString> keyColumns;
+ UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 0 of them");
+ }
+
+ {
+ NProtoBuf::RepeatedPtrField<TString> keyColumns;
+ keyColumns.Add("text2");
+ UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column text2 settings but have text");
+ }
+
+ {
+ NProtoBuf::RepeatedPtrField<TString> keyColumns;
+ keyColumns.Add("text");
+ keyColumns.Add("text");
+ UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
+ UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 2 of them");
+ }
+
+ UNIT_ASSERT_C(ValidateSettings(keyColumns, settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "");
}
diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto
index 89cbbdef90d..f032d20d8b0 100644
--- a/ydb/core/protos/tx_datashard.proto
+++ b/ydb/core/protos/tx_datashard.proto
@@ -1787,6 +1787,43 @@ message TEvPrefixKMeansResponse {
optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 12;
}
+message TEvBuildFulltextIndexRequest {
+ optional uint64 Id = 1;
+
+ optional uint64 TabletId = 2;
+ optional NKikimrProto.TPathID PathId = 3;
+
+ optional uint64 SnapshotTxId = 4;
+ optional uint64 SnapshotStep = 5;
+
+ optional uint64 SeqNoGeneration = 6;
+ optional uint64 SeqNoRound = 7;
+
+ optional Ydb.Table.FulltextIndexSettings Settings = 8;
+
+ optional string IndexName = 9;
+
+ repeated string KeyColumns = 10;
+ repeated string DataColumns = 11;
+
+ optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 12;
+}
+
+message TEvBuildFulltextIndexResponse {
+ optional uint64 Id = 1;
+
+ optional uint64 TabletId = 2;
+ optional NKikimrProto.TPathID PathId = 3;
+
+ optional uint64 RequestSeqNoGeneration = 4;
+ optional uint64 RequestSeqNoRound = 5;
+
+ optional NKikimrIndexBuilder.EBuildStatus Status = 6;
+ repeated Ydb.Issue.IssueMessage Issues = 7;
+
+ optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 8;
+}
+
message TEvCdcStreamScanRequest {
message TLimits {
optional uint32 BatchMaxBytes = 1 [default = 512000];
diff --git a/ydb/core/tx/datashard/build_index/fulltext.cpp b/ydb/core/tx/datashard/build_index/fulltext.cpp
new file mode 100644
index 00000000000..3af00dd7f66
--- /dev/null
+++ b/ydb/core/tx/datashard/build_index/fulltext.cpp
@@ -0,0 +1,415 @@
+#include "common_helper.h"
+#include "../datashard_impl.h"
+#include "../scan_common.h"
+#include "../upload_stats.h"
+#include "../buffer_data.h"
+
+#include <ydb/core/base/appdata.h>
+#include <ydb/core/base/counters.h>
+#include <ydb/core/base/fulltext.h>
+#include <ydb/core/kqp/common/kqp_types.h>
+#include <ydb/core/scheme/scheme_tablecell.h>
+
+#include <ydb/core/tx/tx_proxy/proxy.h>
+#include <ydb/core/tx/tx_proxy/upload_rows.h>
+
+#include <ydb/core/ydb_convert/table_description.h>
+#include <ydb/core/ydb_convert/ydb_convert.h>
+#include <yql/essentials/public/issue/yql_issue_message.h>
+
+#include <util/generic/algorithm.h>
+#include <util/string/builder.h>
+
+namespace NKikimr::NDataShard {
+using namespace NTableIndex::NFulltext;
+using namespace NKikimr::NFulltext;
+
+class TBuildFulltextIndexScan: public TActor<TBuildFulltextIndexScan>, public IActorExceptionHandler, public NTable::IScan {
+ IDriver* Driver = nullptr;
+
+ ui64 TabletId = 0;
+ ui64 BuildId = 0;
+
+ ui64 ReadRows = 0;
+ ui64 ReadBytes = 0;
+
+ TTags ScanTags;
+ TString TextColumn;
+ Ydb::Table::FulltextIndexSettings::Analyzers TextAnalyzers;
+
+ TBatchRowsUploader Uploader;
+ TBufferData* UploadBuf = nullptr;
+
+ const NKikimrTxDataShard::TEvBuildFulltextIndexRequest Request;
+ const TActorId ResponseActorId;
+ const TAutoPtr<TEvDataShard::TEvBuildFulltextIndexResponse> Response;
+
+public:
+ static constexpr NKikimrServices::TActivity::EType ActorActivityType()
+ {
+ return NKikimrServices::TActivity::BUILD_FULLTEXT_INDEX;
+ }
+
+ TBuildFulltextIndexScan(ui64 tabletId, const TUserTable& table, NKikimrTxDataShard::TEvBuildFulltextIndexRequest request,
+ const TActorId& responseActorId, TAutoPtr<TEvDataShard::TEvBuildFulltextIndexResponse>&& response)
+ : TActor{&TThis::StateWork}
+ , TabletId(tabletId)
+ , BuildId{request.GetId()}
+ , Uploader(request.GetScanSettings())
+ , Request(std::move(request))
+ , ResponseActorId{responseActorId}
+ , Response{std::move(response)}
+ {
+ LOG_I("Create " << Debug());
+
+ Y_ENSURE(Request.settings().columns().size() == 1);
+ TextColumn = Request.settings().columns().at(0).column();
+ TextAnalyzers = Request.settings().columns().at(0).analyzers();
+ Y_ENSURE(Request.GetKeyColumns().size() == 1);
+ Y_ENSURE(Request.GetKeyColumns().at(0) == TextColumn);
+
+ auto tags = GetAllTags(table);
+ auto types = GetAllTypes(table);
+
+ {
+ ScanTags.push_back(tags.at(TextColumn));
+
+ for (auto dataColumn : Request.GetDataColumns()) {
+ if (dataColumn != TextColumn) {
+ ScanTags.push_back(tags.at(dataColumn));
+ }
+ }
+ }
+
+ {
+ auto uploadTypes = std::make_shared<NTxProxy::TUploadTypes>();
+ auto addType = [&](const auto& column) {
+ auto it = types.find(column);
+ if (it != types.end()) {
+ Ydb::Type type;
+ NScheme::ProtoFromTypeInfo(it->second, type);
+ uploadTypes->emplace_back(it->first, type);
+ types.erase(it);
+ }
+ };
+ {
+ Ydb::Type type;
+ type.set_type_id(TokenType);
+ uploadTypes->emplace_back(TokenColumn, type);
+ }
+ for (const auto& column : table.KeyColumnIds) {
+ addType(table.Columns.at(column).Name);
+ }
+ for (auto dataColumn : Request.GetDataColumns()) {
+ addType(dataColumn);
+ }
+ UploadBuf = Uploader.AddDestination(Request.GetIndexName(), std::move(uploadTypes));
+ }
+ }
+
+ TInitialState Prepare(IDriver* driver, TIntrusiveConstPtr<TScheme>) final
+ {
+ TActivationContext::AsActorContext().RegisterWithSameMailbox(this);
+ LOG_I("Prepare " << Debug());
+
+ Driver = driver;
+ Uploader.SetOwner(SelfId());
+
+ return {EScan::Feed, {}};
+ }
+
+ EScan Seek(TLead& lead, ui64 seq) final
+ {
+ LOG_T("Seek " << seq << " " << Debug());
+
+ if (seq) {
+ return Uploader.CanFinish()
+ ? EScan::Final
+ : EScan::Sleep;
+ }
+
+ lead.To(ScanTags, {}, NTable::ESeek::Lower);
+
+ return EScan::Feed;
+ }
+
+ EScan Feed(TArrayRef<const TCell> key, const TRow& row) final
+ {
+ // LOG_T("Feed " << Debug());
+
+ ++ReadRows;
+ ReadBytes += CountRowCellBytes(key, *row);
+
+ TVector<TCell> uploadKey(::Reserve(key.size() + 1));
+ TVector<TCell> uploadValue(::Reserve(Request.GetDataColumns().size()));
+
+ TString text((*row).at(0).AsBuf());
+ auto tokens = Analyze(text, TextAnalyzers);
+ for (const auto& token : tokens) {
+ uploadKey.clear();
+ uploadKey.push_back(TCell(token));
+ uploadKey.insert(uploadKey.end(), key.begin(), key.end());
+
+ uploadValue.clear();
+ size_t index = 1; // skip text column
+ for (auto dataColumn : Request.GetDataColumns()) {
+ if (dataColumn != TextColumn) {
+ uploadValue.push_back(row.Get(index++));
+ } else {
+ uploadValue.push_back(TCell(text));
+ }
+ }
+
+ UploadBuf->AddRow(uploadKey, uploadValue);
+ }
+
+ return Uploader.ShouldWaitUpload() ? EScan::Sleep : EScan::Feed;
+ }
+
+ EScan PageFault() final
+ {
+ LOG_T("PageFault " << Debug());
+ return EScan::Feed;
+ }
+
+ EScan Exhausted() final
+ {
+ LOG_T("Exhausted " << Debug());
+
+ // call Seek to wait uploads
+ return EScan::Reset;
+ }
+
+ TAutoPtr<IDestructable> Finish(const std::exception& exc) final
+ {
+ Uploader.AddIssue(exc);
+ return Finish(EStatus::Exception);
+ }
+
+ TAutoPtr<IDestructable> Finish(EStatus status) final
+ {
+ auto& record = Response->Record;
+ record.MutableMeteringStats()->SetReadRows(ReadRows);
+ record.MutableMeteringStats()->SetReadBytes(ReadBytes);
+ record.MutableMeteringStats()->SetCpuTimeUs(Driver->GetTotalCpuTimeUs());
+
+ Uploader.Finish(record, status);
+
+ if (Response->Record.GetStatus() == NKikimrIndexBuilder::DONE) {
+ LOG_N("Done " << Debug() << " " << Response->Record.ShortDebugString());
+ } else {
+ LOG_E("Failed " << Debug() << " " << Response->Record.ShortDebugString());
+ }
+ Send(ResponseActorId, Response.Release());
+
+ Driver = nullptr;
+ this->PassAway();
+ return nullptr;
+ }
+
+ bool OnUnhandledException(const std::exception& exc) final
+ {
+ if (!Driver) {
+ return false;
+ }
+ Driver->Throw(exc);
+ return true;
+ }
+
+ void Describe(IOutputStream& out) const final
+ {
+ out << Debug();
+ }
+
+protected:
+ STFUNC(StateWork)
+ {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvTxUserProxy::TEvUploadRowsResponse, Handle);
+ CFunc(TEvents::TSystem::Wakeup, HandleWakeup);
+ default:
+ LOG_E("StateWork unexpected event type: " << ev->GetTypeRewrite()
+ << " event: " << ev->ToString() << " " << Debug());
+ }
+ }
+
+ void HandleWakeup(const NActors::TActorContext& /*ctx*/)
+ {
+ LOG_D("Retry upload " << Debug());
+
+ Uploader.RetryUpload();
+ }
+
+ void Handle(TEvTxUserProxy::TEvUploadRowsResponse::TPtr& ev, const TActorContext& ctx)
+ {
+ LOG_D("Handle TEvUploadRowsResponse " << Debug()
+ << " ev->Sender: " << ev->Sender.ToString());
+
+ if (!Driver) {
+ return;
+ }
+
+ Uploader.Handle(ev);
+
+ if (Uploader.GetUploadStatus().IsSuccess()) {
+ Driver->Touch(EScan::Feed);
+ return;
+ }
+
+ if (auto retryAfter = Uploader.GetRetryAfter(); retryAfter) {
+ LOG_N("Got retriable error, " << Debug() << " " << Uploader.GetUploadStatus().ToString());
+ ctx.Schedule(*retryAfter, new TEvents::TEvWakeup());
+ return;
+ }
+
+ LOG_N("Got error, abort scan, " << Debug() << " " << Uploader.GetUploadStatus().ToString());
+
+ Driver->Touch(EScan::Final);
+ }
+
+ TString Debug() const
+ {
+ return TStringBuilder() << "TBuildFulltextIndexScan TabletId: " << TabletId << " Id: " << BuildId
+ << " " << Uploader.Debug();
+ }
+};
+
+class TDataShard::TTxHandleSafeBuildFulltextIndexScan final: public NTabletFlatExecutor::TTransactionBase<TDataShard> {
+public:
+ TTxHandleSafeBuildFulltextIndexScan(TDataShard* self, TEvDataShard::TEvBuildFulltextIndexRequest::TPtr&& ev)
+ : TTransactionBase(self)
+ , Ev(std::move(ev))
+ {
+ }
+
+ bool Execute(TTransactionContext&, const TActorContext& ctx) final
+ {
+ Self->HandleSafe(Ev, ctx);
+ return true;
+ }
+
+ void Complete(const TActorContext&) final
+ {
+ }
+
+private:
+ TEvDataShard::TEvBuildFulltextIndexRequest::TPtr Ev;
+};
+
+void TDataShard::Handle(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext&)
+{
+ Execute(new TTxHandleSafeBuildFulltextIndexScan(this, std::move(ev)));
+}
+
+void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx)
+{
+ auto& request = ev->Get()->Record;
+ const ui64 id = request.GetId();
+ TRowVersion rowVersion(request.GetSnapshotStep(), request.GetSnapshotTxId());
+ TScanRecord::TSeqNo seqNo = {request.GetSeqNoGeneration(), request.GetSeqNoRound()};
+
+ try {
+ auto response = MakeHolder<TEvDataShard::TEvBuildFulltextIndexResponse>();
+ FillScanResponseCommonFields(*response, id, TabletID(), seqNo);
+
+ LOG_N("Starting TBuildFulltextIndexScan TabletId: " << TabletID()
+ << " " << request.ShortDebugString()
+ << " row version " << rowVersion);
+
+ // Note: it's very unlikely that we have volatile txs before this snapshot
+ if (VolatileTxManager.HasVolatileTxsAtSnapshot(rowVersion)) {
+ VolatileTxManager.AttachWaitingSnapshotEvent(rowVersion, std::unique_ptr<IEventHandle>(ev.Release()));
+ return;
+ }
+
+ auto badRequest = [&](const TString& error) {
+ response->Record.SetStatus(NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST);
+ auto issue = response->Record.AddIssues();
+ issue->set_severity(NYql::TSeverityIds::S_ERROR);
+ issue->set_message(error);
+ };
+ auto trySendBadRequest = [&] {
+ if (response->Record.GetStatus() == NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST) {
+ LOG_E("Rejecting TBuildFulltextIndexScan bad request TabletId: " << TabletID()
+ << " " << request.ShortDebugString()
+ << " with response " << response->Record.ShortDebugString());
+ ctx.Send(ev->Sender, std::move(response));
+ return true;
+ } else {
+ return false;
+ }
+ };
+
+ // 1. Validating table and path existence
+ if (request.GetTabletId() != TabletID()) {
+ badRequest(TStringBuilder() << "Wrong shard " << request.GetTabletId() << " this is " << TabletID());
+ }
+ if (!IsStateActive()) {
+ badRequest(TStringBuilder() << "Shard " << TabletID() << " is " << State << " and not ready for requests");
+ }
+ const auto pathId = TPathId::FromProto(request.GetPathId());
+ const auto* userTableIt = GetUserTables().FindPtr(pathId.LocalPathId);
+ if (!userTableIt) {
+ badRequest(TStringBuilder() << "Unknown table id: " << pathId.LocalPathId);
+ }
+ if (trySendBadRequest()) {
+ return;
+ }
+ const auto& userTable = **userTableIt;
+
+ // 2. Validating request fields
+ if (!request.HasSnapshotStep() || !request.HasSnapshotTxId()) {
+ badRequest(TStringBuilder() << "Missing snapshot");
+ } else {
+ const TSnapshotKey snapshotKey(pathId, rowVersion.Step, rowVersion.TxId);
+ if (!SnapshotManager.FindAvailable(snapshotKey)) {
+ badRequest(TStringBuilder() << "Unknown snapshot for path id " << pathId.OwnerId << ":" << pathId.LocalPathId
+ << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId);
+ }
+ }
+
+ if (!request.GetIndexName()) {
+ badRequest(TStringBuilder() << "Empty index table name");
+ }
+
+ auto tags = GetAllTags(userTable);
+ for (auto keyColumn : request.GetKeyColumns()) {
+ if (!tags.contains(keyColumn)) {
+ badRequest(TStringBuilder() << "Unknown key column: " << keyColumn);
+ }
+ }
+ for (auto dataColumn : request.GetDataColumns()) {
+ if (!tags.contains(dataColumn)) {
+ badRequest(TStringBuilder() << "Unknown data column: " << dataColumn);
+ }
+ }
+
+ if (trySendBadRequest()) {
+ return;
+ }
+
+ // 3. Validating fulltext index settings
+ if (!request.HasSettings()) {
+ badRequest(TStringBuilder() << "Missing fulltext index settings");
+ } else {
+ TString error;
+ if (!NKikimr::NFulltext::ValidateSettings(request.keycolumns(), request.GetSettings(), error)) {
+ badRequest(error);
+ }
+ }
+
+ if (trySendBadRequest()) {
+ return;
+ }
+
+ // 4. Creating scan
+ TAutoPtr<NTable::IScan> scan = new TBuildFulltextIndexScan(TabletID(), userTable,
+ request, ev->Sender, std::move(response));
+
+ StartScan(this, std::move(scan), id, seqNo, rowVersion, userTable.LocalTid);
+ } catch (const std::exception& exc) {
+ FailScan<TEvDataShard::TEvBuildFulltextIndexResponse>(id, TabletID(), ev->Sender, seqNo, exc, "TBuildFulltextIndexScan");
+ }
+}
+
+}
diff --git a/ydb/core/tx/datashard/build_index/secondary_index.cpp b/ydb/core/tx/datashard/build_index/secondary_index.cpp
index 20e68c03a5b..40342b06156 100644
--- a/ydb/core/tx/datashard/build_index/secondary_index.cpp
+++ b/ydb/core/tx/datashard/build_index/secondary_index.cpp
@@ -577,12 +577,13 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildIndexCreateRequest::TPtr& ev,
// 2. Validating request fields
if (!request.HasSnapshotStep() || !request.HasSnapshotTxId()) {
- badRequest(TStringBuilder() << "Empty snapshot");
- }
- const TSnapshotKey snapshotKey(tableId.PathId, rowVersion.Step, rowVersion.TxId);
- if (!SnapshotManager.FindAvailable(snapshotKey)) {
- badRequest(TStringBuilder() << "Unknown snapshot for path id " << tableId.PathId.OwnerId << ":" << tableId.PathId.LocalPathId
- << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId);
+ badRequest(TStringBuilder() << "Missing snapshot");
+ } else {
+ const TSnapshotKey snapshotKey(tableId.PathId, rowVersion.Step, rowVersion.TxId);
+ if (!SnapshotManager.FindAvailable(snapshotKey)) {
+ badRequest(TStringBuilder() << "Unknown snapshot for path id " << tableId.PathId.OwnerId << ":" << tableId.PathId.LocalPathId
+ << ", snapshot step is " << snapshotKey.Step << ", snapshot tx is " << snapshotKey.TxId);
+ }
}
TSerializedTableRange requestedRange;
diff --git a/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp
new file mode 100644
index 00000000000..39a9b67eee8
--- /dev/null
+++ b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp
@@ -0,0 +1,367 @@
+#include "ut_helpers.h"
+
+#include <ydb/core/base/table_index.h>
+#include <ydb/core/protos/index_builder.pb.h>
+#include <ydb/core/testlib/test_client.h>
+#include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h>
+#include <ydb/core/tx/schemeshard/schemeshard.h>
+#include <ydb/core/tx/tx_proxy/proxy.h>
+#include <ydb/core/tx/tx_proxy/upload_rows.h>
+
+#include <yql/essentials/public/issue/yql_issue_message.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace NKikimr {
+using namespace Tests;
+using Ydb::Table::FulltextIndexSettings;
+using namespace NTableIndex::NFulltext;
+
+static std::atomic<ui64> sId = 1;
+static const TString kMainTable = "/Root/table-main";
+static const TString kIndexTable = "/Root/table-index";
+
+Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) {
+
+ ui64 FillRequest(Tests::TServer::TPtr server, TActorId sender,
+ NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request,
+ std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest)
+ {
+ auto id = sId.fetch_add(1, std::memory_order_relaxed);
+
+ auto snapshot = CreateVolatileSnapshot(server, {kMainTable});
+ auto datashards = GetTableShards(server, sender, kMainTable);
+ TTableId tableId = ResolveTableId(server, sender, kMainTable);
+
+ UNIT_ASSERT(datashards.size() == 1);
+
+ request.SetId(1);
+ request.SetSeqNoGeneration(id);
+ request.SetSeqNoRound(1);
+
+ request.SetTabletId(datashards[0]);
+ tableId.PathId.ToProto(request.MutablePathId());
+
+ request.SetSnapshotTxId(snapshot.TxId);
+ request.SetSnapshotStep(snapshot.Step);
+
+ FulltextIndexSettings settings;
+ settings.set_layout(FulltextIndexSettings::FLAT);
+ auto column = settings.add_columns();
+ column->set_column("text");
+ column->mutable_analyzers()->set_tokenizer(FulltextIndexSettings::WHITESPACE);
+ *request.MutableSettings() = settings;
+
+ request.SetIndexName(kIndexTable);
+
+ request.AddKeyColumns("text");
+
+ setupRequest(request);
+
+ return datashards[0];
+ }
+
+ void DoBadRequest(Tests::TServer::TPtr server, TActorId sender,
+ std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest,
+ const TString& expectedError, bool expectedErrorSubstring = false, NKikimrIndexBuilder::EBuildStatus expectedStatus = NKikimrIndexBuilder::EBuildStatus::BAD_REQUEST)
+ {
+ auto ev = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>();
+
+ auto tabletId = FillRequest(server, sender, ev->Record, setupRequest);
+
+ NKikimr::DoBadRequest<TEvDataShard::TEvBuildFulltextIndexResponse>(server, sender, std::move(ev), tabletId, expectedError, expectedErrorSubstring, expectedStatus);
+ }
+
+ TString DoBuild(Tests::TServer::TPtr server, TActorId sender, std::function<void(NKikimrTxDataShard::TEvBuildFulltextIndexRequest&)> setupRequest) {
+ auto ev1 = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>();
+ auto tabletId = FillRequest(server, sender, ev1->Record, setupRequest);
+
+ auto ev2 = std::make_unique<TEvDataShard::TEvBuildFulltextIndexRequest>();
+ ev2->Record.CopyFrom(ev1->Record);
+
+ auto& runtime = *server->GetRuntime();
+ runtime.SendToPipe(tabletId, sender, ev1.release(), 0, GetPipeConfigWithRetries());
+ runtime.SendToPipe(tabletId, sender, ev2.release(), 0, GetPipeConfigWithRetries());
+
+ TAutoPtr<IEventHandle> handle;
+ auto reply = runtime.GrabEdgeEventRethrow<TEvDataShard::TEvBuildFulltextIndexResponse>(handle);
+
+ UNIT_ASSERT_EQUAL_C(reply->Record.GetStatus(), NKikimrIndexBuilder::EBuildStatus::DONE, reply->Record.ShortDebugString());
+
+ auto index = ReadShardedTable(server, kIndexTable);
+ Cerr << "Index:" << Endl;
+ Cerr << index << Endl;
+ return std::move(index);
+ }
+
+ void CreateMainTable(Tests::TServer::TPtr server, TActorId sender) {
+ TShardedTableOptions options;
+ options.EnableOutOfOrder(true);
+ options.Shards(1);
+ options.AllowSystemColumnNames(false);
+ options.Columns({
+ {"key", "Uint32", true, true},
+ {"text", "String", false, false},
+ {"data", "String", false, false},
+ });
+ CreateShardedTable(server, sender, "/Root", "table-main", options);
+ }
+
+ void FillMainTable(Tests::TServer::TPtr server, TActorId sender) {
+ ExecSQL(server, sender, R"(
+ UPSERT INTO `/Root/table-main` (key, text, data) VALUES
+ (1, "green apple", "one"),
+ (2, "red apple", "two"),
+ (3, "yellow apple", "three"),
+ (4, "red car", "four")
+ )");
+ }
+
+ void CreateIndexTable(Tests::TServer::TPtr server, TActorId sender) {
+ TShardedTableOptions options;
+ options.EnableOutOfOrder(true);
+ options.Shards(1);
+ options.AllowSystemColumnNames(true);
+ options.Columns({
+ {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true},
+ {"key", "Uint32", true, true},
+ {"data", "String", false, false},
+ });
+ CreateShardedTable(server, sender, "/Root", "table-index", options);
+ }
+
+ void Setup(Tests::TServer::TPtr server, TActorId sender) {
+ server->GetRuntime()->SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG);
+ server->GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE);
+
+ InitRoot(server, sender);
+
+ CreateMainTable(server, sender);
+ FillMainTable(server, sender);
+ CreateIndexTable(server, sender);
+ }
+
+ Y_UNIT_TEST(BadRequest) {
+ TPortManager pm;
+ TServerSettings serverSettings(pm.GetPort(2134));
+ serverSettings.SetDomainName("Root");
+
+ Tests::TServer::TPtr server = new TServer(serverSettings);
+ auto sender = server->GetRuntime()->AllocateEdgeActor();
+
+ Setup(server, sender);
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.SetTabletId(0);
+ }, TStringBuilder() << "{ <main>: Error: Wrong shard 0 this is " << GetTableShards(server, sender, kMainTable)[0] << " }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ TPathId(0, 0).ToProto(request.MutablePathId());
+ }, "{ <main>: Error: Unknown table id: 0 }");
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.SetSnapshotStep(request.GetSnapshotStep() + 1);
+ }, "Error: Unknown snapshot", true);
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearSnapshotStep();
+ }, "{ <main>: Error: Missing snapshot }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.SetSnapshotTxId(request.GetSnapshotTxId() + 1);
+ }, "Error: Unknown snapshot", true);
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearSnapshotTxId();
+ }, "{ <main>: Error: Missing snapshot }");
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.clear_settings();
+ }, "{ <main>: Error: Missing fulltext index settings }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.MutableSettings()->clear_columns();
+ }, "{ <main>: Error: fulltext index should have a single text key column settings but have 0 of them }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.MutableSettings()->mutable_columns()->at(0).mutable_analyzers()->clear_tokenizer();
+ }, "{ <main>: Error: tokenizer should be set }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.MutableSettings()->mutable_columns()->at(0).set_column("data");
+ }, "{ <main>: Error: fulltext index should have a single text key column text settings but have data }");
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearIndexName();
+ }, "{ <main>: Error: Empty index table name }");
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearKeyColumns();
+ }, "{ <main>: Error: fulltext index should have a single text key column but have 0 of them }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearKeyColumns();
+ request.AddKeyColumns("some");
+ }, "{ <main>: Error: Unknown key column: some }");
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.AddDataColumns("some");
+ }, "{ <main>: Error: Unknown data column: some }");
+
+ // test multiple issues:
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
+ request.ClearIndexName();
+ request.ClearKeyColumns();
+ request.AddKeyColumns("some");
+ }, "[ { <main>: Error: Empty index table name } { <main>: Error: Unknown key column: some } ]");
+ }
+
+ Y_UNIT_TEST(Build) {
+ TPortManager pm;
+ TServerSettings serverSettings(pm.GetPort(2134));
+ serverSettings.SetDomainName("Root");
+
+ Tests::TServer::TPtr server = new TServer(serverSettings);
+ auto sender = server->GetRuntime()->AllocateEdgeActor();
+
+ Setup(server, sender);
+
+ auto result = DoBuild(server, sender, [](auto&){});
+
+ UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, data = (empty maybe)
+__ydb_token = apple, key = 2, data = (empty maybe)
+__ydb_token = apple, key = 3, data = (empty maybe)
+__ydb_token = car, key = 4, data = (empty maybe)
+__ydb_token = green, key = 1, data = (empty maybe)
+__ydb_token = red, key = 2, data = (empty maybe)
+__ydb_token = red, key = 4, data = (empty maybe)
+__ydb_token = yellow, key = 3, data = (empty maybe)
+)");
+ }
+
+ Y_UNIT_TEST(BuildWithData) {
+ TPortManager pm;
+ TServerSettings serverSettings(pm.GetPort(2134));
+ serverSettings.SetDomainName("Root");
+
+ Tests::TServer::TPtr server = new TServer(serverSettings);
+ auto sender = server->GetRuntime()->AllocateEdgeActor();
+
+ Setup(server, sender);
+
+ auto result = DoBuild(server, sender, [](auto& request) {
+ request.AddDataColumns("data");
+ });
+
+ UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, data = one
+__ydb_token = apple, key = 2, data = two
+__ydb_token = apple, key = 3, data = three
+__ydb_token = car, key = 4, data = four
+__ydb_token = green, key = 1, data = one
+__ydb_token = red, key = 2, data = two
+__ydb_token = red, key = 4, data = four
+__ydb_token = yellow, key = 3, data = three
+)");
+ }
+
+ Y_UNIT_TEST(BuildWithTextData) {
+ TPortManager pm;
+ TServerSettings serverSettings(pm.GetPort(2134));
+ serverSettings.SetDomainName("Root");
+
+ Tests::TServer::TPtr server = new TServer(serverSettings);
+ auto sender = server->GetRuntime()->AllocateEdgeActor();
+
+ InitRoot(server, sender);
+
+ CreateMainTable(server, sender);
+ FillMainTable(server, sender);
+
+ { // CreateIndexTable with text column
+ TShardedTableOptions options;
+ options.EnableOutOfOrder(true);
+ options.Shards(1);
+ options.AllowSystemColumnNames(true);
+ options.Columns({
+ {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true},
+ {"key", "Uint32", true, true},
+ {"text", "String", false, false},
+ {"data", "String", false, false},
+ });
+ CreateShardedTable(server, sender, "/Root", "table-index", options);
+ }
+
+ auto result = DoBuild(server, sender, [](auto& request) {
+ request.AddDataColumns("text");
+ request.AddDataColumns("data");
+ });
+
+ UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, text = green apple, data = one
+__ydb_token = apple, key = 2, text = red apple, data = two
+__ydb_token = apple, key = 3, text = yellow apple, data = three
+__ydb_token = car, key = 4, text = red car, data = four
+__ydb_token = green, key = 1, text = green apple, data = one
+__ydb_token = red, key = 2, text = red apple, data = two
+__ydb_token = red, key = 4, text = red car, data = four
+__ydb_token = yellow, key = 3, text = yellow apple, data = three
+)");
+ }
+
+ Y_UNIT_TEST(BuildWithTextFromKey) {
+ TPortManager pm;
+ TServerSettings serverSettings(pm.GetPort(2134));
+ serverSettings.SetDomainName("Root");
+
+ Tests::TServer::TPtr server = new TServer(serverSettings);
+ auto sender = server->GetRuntime()->AllocateEdgeActor();
+
+ server->GetRuntime()->SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG);
+ server->GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE);
+
+ InitRoot(server, sender);
+
+ { // CreateMainTable
+ TShardedTableOptions options;
+ options.EnableOutOfOrder(true);
+ options.Shards(1);
+ options.AllowSystemColumnNames(false);
+ options.Columns({
+ {"key", "Uint32", true, true},
+ {"text", "String", true, true},
+ {"subkey", "Uint32", true, true},
+ {"data", "String", false, false},
+ });
+ CreateShardedTable(server, sender, "/Root", "table-main", options);
+ }
+ { // FillMainTable
+ ExecSQL(server, sender, R"(
+ UPSERT INTO `/Root/table-main` (key, text, subkey, data) VALUES
+ (1, "green apple", 11, "one"),
+ (2, "red apple", 22, "two"),
+ (3, "yellow apple", 33, "three"),
+ (4, "red car", 44, "four")
+ )");
+ }
+ { // CreateIndexTable
+ TShardedTableOptions options;
+ options.EnableOutOfOrder(true);
+ options.Shards(1);
+ options.AllowSystemColumnNames(true);
+ options.Columns({
+ {TokenColumn, NTableIndex::NFulltext::TokenTypeName, true, true},
+ {"key", "Uint32", true, true},
+ {"text", "String", true, true},
+ {"subkey", "Uint32", true, true},
+ {"data", "String", false, false},
+ });
+ CreateShardedTable(server, sender, "/Root", "table-index", options);
+ }
+
+ auto result = DoBuild(server, sender, [](auto& request) {
+ request.AddDataColumns("data");
+ });
+
+ UNIT_ASSERT_VALUES_EQUAL(result, R"(__ydb_token = apple, key = 1, text = green apple, subkey = 11, data = one
+__ydb_token = apple, key = 2, text = red apple, subkey = 22, data = two
+__ydb_token = apple, key = 3, text = yellow apple, subkey = 33, data = three
+__ydb_token = car, key = 4, text = red car, subkey = 44, data = four
+__ydb_token = green, key = 1, text = green apple, subkey = 11, data = one
+__ydb_token = red, key = 2, text = red apple, subkey = 22, data = two
+__ydb_token = red, key = 4, text = red car, subkey = 44, data = four
+__ydb_token = yellow, key = 3, text = yellow apple, subkey = 33, data = three
+)");
+ }
+}
+
+}
diff --git a/ydb/core/tx/datashard/build_index/ut/ya.make b/ydb/core/tx/datashard/build_index/ut/ya.make
index 8da7981617c..b71af3fef00 100644
--- a/ydb/core/tx/datashard/build_index/ut/ya.make
+++ b/ydb/core/tx/datashard/build_index/ut/ya.make
@@ -27,6 +27,7 @@ PEERDIR(
YQL_LAST_ABI_VERSION()
SRCS(
+ ut_fulltext.cpp
ut_local_kmeans.cpp
ut_prefix_kmeans.cpp
ut_recompute_kmeans.cpp
diff --git a/ydb/core/tx/datashard/datashard.h b/ydb/core/tx/datashard/datashard.h
index 47becb18023..dabc4175395 100644
--- a/ydb/core/tx/datashard/datashard.h
+++ b/ydb/core/tx/datashard/datashard.h
@@ -367,6 +367,9 @@ namespace TEvDataShard {
EvIncrementalRestoreResponse,
+ EvBuildFulltextIndexRequest,
+ EvBuildFulltextIndexResponse,
+
EvEnd
};
@@ -1560,6 +1563,18 @@ namespace TEvDataShard {
TEvDataShard::EvPrefixKMeansResponse> {
};
+ struct TEvBuildFulltextIndexRequest
+ : public TEventPB<TEvBuildFulltextIndexRequest,
+ NKikimrTxDataShard::TEvBuildFulltextIndexRequest,
+ TEvDataShard::EvBuildFulltextIndexRequest> {
+ };
+
+ struct TEvBuildFulltextIndexResponse
+ : public TEventPB<TEvBuildFulltextIndexResponse,
+ NKikimrTxDataShard::TEvBuildFulltextIndexResponse,
+ TEvDataShard::EvBuildFulltextIndexResponse> {
+ };
+
struct TEvIncrementalRestoreResponse
: public TEventPB<TEvIncrementalRestoreResponse,
NKikimrTxDataShard::TEvIncrementalRestoreResponse,
diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h
index 52f48f77cd6..29a967d6aaf 100644
--- a/ydb/core/tx/datashard/datashard_impl.h
+++ b/ydb/core/tx/datashard/datashard_impl.h
@@ -241,6 +241,7 @@ class TDataShard
class TTxHandleSafeReshuffleKMeansScan;
class TTxHandleSafeRecomputeKMeansScan;
class TTxHandleSafeStatisticsScan;
+ class TTxHandleSafeBuildFulltextIndexScan;
class TTxMediatorStateRestored;
@@ -1342,6 +1343,8 @@ class TDataShard
void HandleSafe(TEvDataShard::TEvLocalKMeansRequest::TPtr& ev, const TActorContext& ctx);
void Handle(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx);
void HandleSafe(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx);
+ void HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev, const TActorContext& ctx);
void Handle(TEvDataShard::TEvCdcStreamScanRequest::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPrivate::TEvCdcStreamScanRegistered::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPrivate::TEvCdcStreamScanProgress::TPtr& ev, const TActorContext& ctx);
@@ -3225,6 +3228,7 @@ protected:
HFunc(TEvDataShard::TEvRecomputeKMeansRequest, Handle);
HFunc(TEvDataShard::TEvLocalKMeansRequest, Handle);
HFunc(TEvDataShard::TEvPrefixKMeansRequest, Handle);
+ HFunc(TEvDataShard::TEvBuildFulltextIndexRequest, Handle);
HFunc(TEvDataShard::TEvCdcStreamScanRequest, Handle);
HFunc(TEvPrivate::TEvCdcStreamScanRegistered, Handle);
HFunc(TEvPrivate::TEvCdcStreamScanProgress, Handle);
diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make
index c210e96e4f9..ee92b38d455 100644
--- a/ydb/core/tx/datashard/ya.make
+++ b/ydb/core/tx/datashard/ya.make
@@ -218,13 +218,14 @@ SRCS(
wait_for_plan_unit.cpp
wait_for_stream_clearance_unit.cpp
- build_index/prefix_kmeans.cpp
+ build_index/fulltext.cpp
build_index/kmeans_helper.cpp
build_index/local_kmeans.cpp
- build_index/sample_k.cpp
- build_index/secondary_index.cpp
+ build_index/prefix_kmeans.cpp
build_index/recompute_kmeans.cpp
build_index/reshuffle_kmeans.cpp
+ build_index/sample_k.cpp
+ build_index/secondary_index.cpp
build_index/unique_index.cpp
)
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp
index fc648c9dda8..4ee836c4c14 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp
@@ -153,7 +153,7 @@ TVector<ISubOperation::TPtr> CreateIndexedTable(TOperationId nextId, const TTxTr
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")};
}
TString msg;
- if (!NKikimr::NFulltext::ValidateSettings(indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) {
+ if (!NKikimr::NFulltext::ValidateSettings(indexDescription.keycolumnnames(), indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) {
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, msg)};
}
break;
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp
index 0b83aae3012..6dc406e0552 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp
@@ -282,7 +282,7 @@ private:
buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext;
NKikimrSchemeOp::TFulltextIndexDescription fulltextIndexDescription;
*fulltextIndexDescription.MutableSettings() = index.global_fulltext_index().fulltext_settings();
- if (!NKikimr::NFulltext::ValidateSettings(fulltextIndexDescription.GetSettings(), explain)) {
+ if (!NKikimr::NFulltext::ValidateSettings(index.index_columns(), fulltextIndexDescription.GetSettings(), explain)) {
return false;
}
buildInfo.SpecializedIndexDescription = fulltextIndexDescription;
diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h
index 9b089450927..ad99fca8054 100644
--- a/ydb/core/tx/schemeshard/schemeshard_utils.h
+++ b/ydb/core/tx/schemeshard/schemeshard_utils.h
@@ -176,13 +176,13 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat
return false;
}
- const TString& indexColumnName = indexKeys.KeyColumns.back();
- Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName));
- auto typeInfo = baseColumnTypes.at(indexColumnName);
+ const TString& embeddingColumnName = indexKeys.KeyColumns.back();
+ Y_ABORT_UNLESS(baseColumnTypes.contains(embeddingColumnName));
+ auto typeInfo = baseColumnTypes.at(embeddingColumnName);
if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
- error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
+ error = TStringBuilder() << "Embedding column '" << embeddingColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
return false;
}
break;
@@ -193,30 +193,18 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat
if (indexKeys.KeyColumns.size() > 1) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
- error = TStringBuilder() << "fulltext index can only have a single key text column";
+ error = TStringBuilder() << "fulltext index should have a single text key column";
return false;
}
- if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() != 1) {
- status = NKikimrScheme::EStatus::StatusInvalidParameter;
- error = TStringBuilder() << "fulltext index should have single '" << indexKeys.KeyColumns.at(0) << "' column settings"
- << " but have " << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() << " of them";
- return false;
- }
- if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() != indexKeys.KeyColumns.at(0)) {
- status = NKikimrScheme::EStatus::StatusInvalidParameter;
- error = TStringBuilder() << "fulltext index should have '" << indexKeys.KeyColumns.at(0) << "' column settings"
- << " but have '" << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() << "' column settings";
- return false;
- }
-
- const TString& indexColumnName = indexKeys.KeyColumns.back();
- Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName));
- auto typeInfo = baseColumnTypes.at(indexColumnName);
+
+ const TString& textColumnName = indexKeys.KeyColumns.at(0);
+ Y_ABORT_UNLESS(baseColumnTypes.contains(textColumnName));
+ auto typeInfo = baseColumnTypes.at(textColumnName);
// TODO: support utf-8 in fulltext index
if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
- error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
+ error = TStringBuilder() << "Text column '" << textColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
return false;
}
break;
diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto
index 93c74186a9c..3941e270baa 100644
--- a/ydb/library/services/services.proto
+++ b/ydb/library/services/services.proto
@@ -1119,5 +1119,6 @@ message TActivity {
SCHEME_BOARD_RESTORE_ACTOR = 669;
REPLICATION_CONTROLLER_RESOURCE_ID_RESOLVER = 670;
BS_VDISK_METADATA_ACTOR = 671;
+ BUILD_FULLTEXT_INDEX = 672;
};
};