aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/deprecated/kmp/kmp.h
blob: ce7783e2fccdd2a24c107d004e68d1c00210097a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#pragma once

#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/yexception.h>

template <typename T>
void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) {
    Y_ENSURE(begin != end, TStringBuf("empty pattern"));
    ssize_t len = end - begin;
    TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]);
    ssize_t i = 0;
    ssize_t j = -1;
    resultHolder[0] = -1;
    while (i < len) {
        while ((j >= 0) && (begin[j] != begin[i]))
            j = resultHolder[j];
        ++i;
        ++j;
        Y_ASSERT(i >= 0); 
        Y_ASSERT(j >= 0); 
        Y_ASSERT(j < len); 
        if ((i < len) && (begin[i] == begin[j]))
            resultHolder[i] = resultHolder[j];
        else
            resultHolder[i] = j;
    }
    *result = resultHolder.Release();
}

class TKMPMatcher {
private:
    TArrayHolder<ssize_t> PrefixFunction;
    TString Pattern;

    void ComputePrefixFunction();

public:
    TKMPMatcher(const char* patternBegin, const char* patternEnd);
    TKMPMatcher(const TString& pattern);

    bool SubStr(const char* begin, const char* end, const char*& result) const {
        Y_ASSERT(begin <= end); 
        ssize_t m = Pattern.size();
        ssize_t n = end - begin;
        ssize_t i, j;
        for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) {
            while ((j >= 0) && (Pattern[j] != begin[i]))
                j = PrefixFunction[j];
        }
        if (j == m) {
            result = begin + i - m;
            return true;
        } else {
            return false;
        }
    }
};

template <typename T>
class TKMPStreamMatcher {
public:
    class ICallback {
    public:
        virtual void OnMatch(const T* begin, const T* end) = 0;
        virtual ~ICallback() = default;
    };

private:
    ICallback* Callback;
    TArrayHolder<ssize_t> PrefixFunction;
    using TTVector = TVector<T>;
    TTVector Pattern;
    ssize_t State;
    TTVector Candidate;

public:
    TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback)
        : Callback(callback)
        , Pattern(patternBegin, patternEnd)
        , State(0)
        , Candidate(Pattern.size())
    {
        ssize_t* pf;
        ComputePrefixFunction(patternBegin, patternEnd, &pf);
        PrefixFunction.Reset(pf);
    }

    void Push(const T& symbol) {
        while ((State >= 0) && (Pattern[State] != symbol)) {
            Y_ASSERT(State <= (ssize_t) Pattern.size());
            State = PrefixFunction[State];
            Y_ASSERT(State <= (ssize_t) Pattern.size());
        }
        if (State >= 0)
            Candidate[State] = symbol;
        ++State;
        if (State == (ssize_t) Pattern.size()) {
            Callback->OnMatch(Candidate.begin(), Candidate.end());
            State = 0;
        }
    }

    void Clear() {
        State = 0;
    }
};