From 1110808a9d39d4b808aef724c861a2e1a38d2a69 Mon Sep 17 00:00:00 2001 From: Devtools Arcadia Date: Mon, 7 Feb 2022 18:08:42 +0300 Subject: intermediate changes ref:cde9a383711a11544ce7e107a78147fb96cc4029 --- library/cpp/diff/diff.cpp | 87 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 library/cpp/diff/diff.cpp (limited to 'library/cpp/diff/diff.cpp') diff --git a/library/cpp/diff/diff.cpp b/library/cpp/diff/diff.cpp new file mode 100644 index 00000000000..be57da7f396 --- /dev/null +++ b/library/cpp/diff/diff.cpp @@ -0,0 +1,87 @@ +#include "diff.h" + +#include +#include + +#include + +template +struct TCollectionImpl { + TVector> Words; + TVector Keys; + + inline bool Consume(const T* b, const T* e, const T*) { + if (b < e) { + Words.push_back(TConstArrayRef(b, e)); + Keys.push_back(FnvHash((const char*)b, (e - b) * sizeof(T))); + } + return true; + } + + TConstArrayRef Remap(const TConstArrayRef& keys) const { + if (keys.empty()) { + return TConstArrayRef(); + } + auto firstWordPos = std::distance(Keys.data(), keys.begin()); + auto lastWordPos = std::distance(Keys.data(), keys.end()) - 1; + Y_ASSERT(firstWordPos >= 0); + Y_ASSERT(lastWordPos >= firstWordPos); + Y_ASSERT(static_cast(lastWordPos) < Words.size()); + + return TConstArrayRef(Words[firstWordPos].begin(), Words[lastWordPos].end()); + } + + TConstArrayRef GetKeys() const { + return TConstArrayRef(Keys); + } +}; + +template +struct TCollection { +}; + +template <> +struct TCollection: public TCollectionImpl { + TCollection(const TStringBuf& str, const TString& delims) { + TSetDelimiter set(delims.data()); + TKeepDelimiters> c(this); + SplitString(str.begin(), str.end(), set, c); + } +}; + +template <> +struct TCollection: public TCollectionImpl { + TCollection(const TWtringBuf& str, const TUtf16String& delims) { + TSetDelimiter set(delims.data()); + TKeepDelimiters> c(this); + SplitString(str.begin(), str.end(), set, c); + } +}; + +size_t NDiff::InlineDiff(TVector>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims) { + if (delims.empty()) { + return InlineDiff(chunks, TConstArrayRef(left.data(), left.size()), TConstArrayRef(right.data(), right.size())); + } + TCollection c1(left, delims); + TCollection c2(right, delims); + TVector> diff; + const size_t dist = InlineDiff(diff, c1.GetKeys(), c2.GetKeys()); + for (const auto& it : diff) { + chunks.push_back(TChunk(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common))); + } + return dist; +} + +size_t NDiff::InlineDiff(TVector>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims) { + if (delims.empty()) { + return InlineDiff(chunks, TConstArrayRef(left.data(), left.size()), TConstArrayRef(right.data(), right.size())); + } + TCollection c1(left, delims); + TCollection c2(right, delims); + TVector> diff; + const size_t dist = InlineDiff(diff, c1.GetKeys(), c2.GetKeys()); + for (const auto& it : diff) { + chunks.push_back(TChunk(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common))); + } + return dist; +} -- cgit v1.3