#pragma once
#include <library/cpp/lcs/lcs_via_lis.h>
#include <util/generic/algorithm.h>
#include <util/generic/array_ref.h>
#include <util/generic/strbuf.h>
#include <util/generic/vector.h>
#include <util/stream/output.h>
#include <util/string/split.h>
namespace NDiff {
template <typename T>
struct TChunk {
TConstArrayRef<T> Left;
TConstArrayRef<T> Right;
TConstArrayRef<T> Common;
TChunk() = default;
TChunk(const TConstArrayRef<T>& left, const TConstArrayRef<T>& right, const TConstArrayRef<T>& common)
: Left(left)
, Right(right)
, Common(common)
{
}
};
template <typename T>
size_t InlineDiff(TVector<TChunk<T>>& chunks, const TConstArrayRef<T>& left, const TConstArrayRef<T>& right) {
TConstArrayRef<T> s1(left);
TConstArrayRef<T> s2(right);
bool swapped = false;
if (s1.size() < s2.size()) {
// NLCS will silently swap strings if second string is longer
// So we swap strings here and remember the fact since it is crucial to diff
DoSwap(s1, s2);
swapped = true;
}
TVector<T> lcs;
NLCS::TLCSCtx<T> ctx;
NLCS::MakeLCS<T>(s1, s2, &lcs, &ctx);
// Start points of current common and diff parts
const T* c1 = nullptr;
const T* c2 = nullptr;
const T* d1 = s1.begin();
const T* d2 = s2.begin();
// End points of current common parts
const T* e1 = s1.begin();
const T* e2 = s2.begin();
size_t dist = s1.size() - lcs.size();
const size_t n = ctx.ResultBuffer.size();
for (size_t i = 0; i <= n && (e1 != s1.end() || e2 != s2.end());) {
if (i < n) {
// Common character exists
// LCS is marked against positions in s2
// Save the beginning of common part in s2
c2 = s2.begin() + ctx.ResultBuffer[i];
// Find the beginning of common part in s1
c1 = Find(e1, s1.end(), *c2);
// Follow common substring
for (e1 = c1, e2 = c2; i < n && *e1 == *e2; ++e1, ++e2) {
++i;
}
} else {
// No common character, common part is empty
c1 = s1.end();
c2 = s2.end();
e1 = s1.end();
e2 = s2.end();
}
TChunk<T> chunk(TConstArrayRef<T>(d1, c1), TConstArrayRef<T>(d2, c2), TConstArrayRef<T>(c1, e1));
if (swapped) {
DoSwap(chunk.Left, chunk.Right);
chunk.Common = TConstArrayRef<T>(c2, e2);
}
chunks.push_back(chunk);
d1 = e1;
d2 = e2;
}
return dist;
}
template <typename TFormatter, typename T>
void PrintChunks(IOutputStream& out, const TFormatter& fmt, const TVector<TChunk<T>>& chunks) {
for (typename TVector<TChunk<T>>::const_iterator chunk = chunks.begin(); chunk != chunks.end(); ++chunk) {
if (!chunk->Left.empty() || !chunk->Right.empty()) {
out << fmt.Special("(");
out << fmt.Left(chunk->Left);
out << fmt.Special("|");
out << fmt.Right(chunk->Right);
out << fmt.Special(")");
}
out << fmt.Common(chunk->Common);
}
}
// Without delimiters calculates character-wise diff
// With delimiters calculates token-wise diff
size_t InlineDiff(TVector<TChunk<char>>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims = TString());
size_t InlineDiff(TVector<TChunk<wchar16>>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims = TUtf16String());
}