1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#pragma once
#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
template <typename T>
void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) {
Y_ENSURE(begin != end, TStringBuf("empty pattern"));
ssize_t len = end - begin;
TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]);
ssize_t i = 0;
ssize_t j = -1;
resultHolder[0] = -1;
while (i < len) {
while ((j >= 0) && (begin[j] != begin[i]))
j = resultHolder[j];
++i;
++j;
Y_ASSERT(i >= 0);
Y_ASSERT(j >= 0);
Y_ASSERT(j < len);
if ((i < len) && (begin[i] == begin[j]))
resultHolder[i] = resultHolder[j];
else
resultHolder[i] = j;
}
*result = resultHolder.Release();
}
class TKMPMatcher {
private:
TArrayHolder<ssize_t> PrefixFunction;
TString Pattern;
void ComputePrefixFunction();
public:
TKMPMatcher(const char* patternBegin, const char* patternEnd);
TKMPMatcher(const TString& pattern);
bool SubStr(const char* begin, const char* end, const char*& result) const {
Y_ASSERT(begin <= end);
ssize_t m = Pattern.size();
ssize_t n = end - begin;
ssize_t i, j;
for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) {
while ((j >= 0) && (Pattern[j] != begin[i]))
j = PrefixFunction[j];
}
if (j == m) {
result = begin + i - m;
return true;
} else {
return false;
}
}
};
template <typename T>
class TKMPStreamMatcher {
public:
class ICallback {
public:
virtual void OnMatch(const T* begin, const T* end) = 0;
virtual ~ICallback() = default;
};
private:
ICallback* Callback;
TArrayHolder<ssize_t> PrefixFunction;
using TTVector = TVector<T>;
TTVector Pattern;
ssize_t State;
TTVector Candidate;
public:
TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback)
: Callback(callback)
, Pattern(patternBegin, patternEnd)
, State(0)
, Candidate(Pattern.size())
{
ssize_t* pf;
ComputePrefixFunction(patternBegin, patternEnd, &pf);
PrefixFunction.Reset(pf);
}
void Push(const T& symbol) {
while ((State >= 0) && (Pattern[State] != symbol)) {
Y_ASSERT(State <= (ssize_t) Pattern.size());
State = PrefixFunction[State];
Y_ASSERT(State <= (ssize_t) Pattern.size());
}
if (State >= 0)
Candidate[State] = symbol;
++State;
if (State == (ssize_t) Pattern.size()) {
Callback->OnMatch(Candidate.begin(), Candidate.end());
State = 0;
}
}
void Clear() {
State = 0;
}
};
|