aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/diff/diff.cpp
blob: be57da7f396cf5cb4ff6a4f05a4179bd433040eb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include "diff.h"

#include <util/generic/hash.h>
#include <util/digest/fnv.h>

#include <iterator>

template <typename T>
struct TCollectionImpl {
    TVector<TConstArrayRef<T>> Words;
    TVector<ui64> Keys;

    inline bool Consume(const T* b, const T* e, const T*) {
        if (b < e) {
            Words.push_back(TConstArrayRef<T>(b, e));
            Keys.push_back(FnvHash<ui64>((const char*)b, (e - b) * sizeof(T)));
        }
        return true;
    }

    TConstArrayRef<T> Remap(const TConstArrayRef<ui64>& keys) const {
        if (keys.empty()) {
            return TConstArrayRef<T>();
        }
        auto firstWordPos = std::distance(Keys.data(), keys.begin());
        auto lastWordPos = std::distance(Keys.data(), keys.end()) - 1;
        Y_ASSERT(firstWordPos >= 0);
        Y_ASSERT(lastWordPos >= firstWordPos);
        Y_ASSERT(static_cast<size_t>(lastWordPos) < Words.size());

        return TConstArrayRef<T>(Words[firstWordPos].begin(), Words[lastWordPos].end());
    }

    TConstArrayRef<ui64> GetKeys() const {
        return TConstArrayRef<ui64>(Keys);
    }
};

template <typename T>
struct TCollection {
};

template <>
struct TCollection<char>: public TCollectionImpl<char> {
    TCollection(const TStringBuf& str, const TString& delims) {
        TSetDelimiter<const char> set(delims.data());
        TKeepDelimiters<TCollection<char>> c(this);
        SplitString(str.begin(), str.end(), set, c);
    }
};

template <>
struct TCollection<wchar16>: public TCollectionImpl<wchar16> {
    TCollection(const TWtringBuf& str, const TUtf16String& delims) {
        TSetDelimiter<const wchar16> set(delims.data());
        TKeepDelimiters<TCollection<wchar16>> c(this);
        SplitString(str.begin(), str.end(), set, c);
    }
};

size_t NDiff::InlineDiff(TVector<TChunk<char>>& chunks, const TStringBuf& left, const TStringBuf& right, const TString& delims) {
    if (delims.empty()) {
        return InlineDiff<char>(chunks, TConstArrayRef<char>(left.data(), left.size()), TConstArrayRef<char>(right.data(), right.size()));
    }
    TCollection<char> c1(left, delims);
    TCollection<char> c2(right, delims);
    TVector<TChunk<ui64>> diff;
    const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys());
    for (const auto& it : diff) {
        chunks.push_back(TChunk<char>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common)));
    }
    return dist;
}

size_t NDiff::InlineDiff(TVector<TChunk<wchar16>>& chunks, const TWtringBuf& left, const TWtringBuf& right, const TUtf16String& delims) {
    if (delims.empty()) {
        return InlineDiff<wchar16>(chunks, TConstArrayRef<wchar16>(left.data(), left.size()), TConstArrayRef<wchar16>(right.data(), right.size()));
    }
    TCollection<wchar16> c1(left, delims);
    TCollection<wchar16> c2(right, delims);
    TVector<TChunk<ui64>> diff;
    const size_t dist = InlineDiff<ui64>(diff, c1.GetKeys(), c2.GetKeys());
    for (const auto& it : diff) {
        chunks.push_back(TChunk<wchar16>(c1.Remap(it.Left), c2.Remap(it.Right), c1.Remap(it.Common)));
    }
    return dist;
}