diff options
| author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
|---|---|---|
| committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
| commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
| tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/diff/diff.cpp | |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/diff/diff.cpp')
| -rw-r--r-- | library/cpp/diff/diff.cpp | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/library/cpp/diff/diff.cpp b/library/cpp/diff/diff.cpp new file mode 100644 index 00000000000..be57da7f396 --- /dev/null +++ b/library/cpp/diff/diff.cpp @@ -0,0 +1,87 @@ +#include "diff.h" + +#include <util/generic/hash.h> +#include <util/digest/fnv.h> + +#include <iterator> + +template <typename T> +struct TCollectionImpl { + TVector<TConstArrayRef<T>> Words; + TVector<ui64> Keys; + + inline bool Consume(const T* b, const T* e, const T*) { + if (b < e) { + Words.push_back(TConstArrayRef<T>(b, e)); + Keys.push_back(FnvHash<ui64>((const char*)b, (e - b) * sizeof(T))); + } + return true; + } + + TConstArrayRef<T> Remap(const TConstArrayRef<ui64>& keys) const { + if (keys.empty()) { + return TConstArrayRef<T>(); + } + auto firstWordPos = std::distance(Keys.data(), keys.begin()); + auto lastWordPos = std::distance(Keys.data(), keys.end()) - 1; + Y_ASSERT(firstWordPos >= 0); + Y_ASSERT(lastWordPos >= firstWordPos); + Y_ASSERT(static_cast<size_t>(lastWordPos) < Words.size()); + + return TConstArrayRef<T>(Words[firstWordPos].begin(), Words[lastWordPos].end()); + } + + TConstArrayRef<ui64> GetKeys() const { + return TConstArrayRef<ui64>(Keys); + } +}; + +template <typename T> +struct TCollection { +}; + +template <> +struct TCollection<char>: public TCollectionImpl<char> { + TCollection(const TStringBuf& str, const TString& delims) { + TSetDelimiter<const char> set(delims.data()); + TKeepDelimiters<TCollection<char>> c(this); + SplitString(str.begin(), str.end(), set, c); + } +}; + +template <> +struct TCollection<wchar16>: public TCollectionImpl<wchar16> { + TCollection(const TWtringBuf& str, const TUtf16String& delims) { + TSetDelimiter<const wchar16> set(delims.data()); + TKeepDelimiters<TCollection<wchar16>> c(this); + SplitString(str.begin(), str.end(), set, c); + } +}; + +size_t NDiff::InlineDiff(TVector<TChunk<char>>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims) { + if (delims.empty()) { + return InlineDiff<char>(chunks, TConstArrayRef<char>(left.data(), left.size()), TConstArrayRef<char>(right.data(), right.size())); + } + TCollection<char> c1(left, delims); + TCollection<char> c2(right, delims); + TVector<TChunk<ui64>> diff; + const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys()); + for (const auto& it : diff) { + chunks.push_back(TChunk<char>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common))); + } + return dist; +} + +size_t NDiff::InlineDiff(TVector<TChunk<wchar16>>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims) { + if (delims.empty()) { + return InlineDiff<wchar16>(chunks, TConstArrayRef<wchar16>(left.data(), left.size()), TConstArrayRef<wchar16>(right.data(), right.size())); + } + TCollection<wchar16> c1(left, delims); + TCollection<wchar16> c2(right, delims); + TVector<TChunk<ui64>> diff; + const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys()); + for (const auto& it : diff) { + chunks.push_back(TChunk<wchar16>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common))); + } + return dist; +} |
