aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/deprecated/kmp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/deprecated/kmp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/deprecated/kmp')
-rw-r--r--library/cpp/deprecated/kmp/kmp.cpp21
-rw-r--r--library/cpp/deprecated/kmp/kmp.h108
-rw-r--r--library/cpp/deprecated/kmp/kmp_ut.cpp80
-rw-r--r--library/cpp/deprecated/kmp/ut/ya.make9
-rw-r--r--library/cpp/deprecated/kmp/ya.make10
5 files changed, 228 insertions, 0 deletions
diff --git a/library/cpp/deprecated/kmp/kmp.cpp b/library/cpp/deprecated/kmp/kmp.cpp
new file mode 100644
index 0000000000..d02074c94a
--- /dev/null
+++ b/library/cpp/deprecated/kmp/kmp.cpp
@@ -0,0 +1,21 @@
+#include "kmp.h"
+
+#include <util/generic/yexception.h>
+
+TKMPMatcher::TKMPMatcher(const char* patternBegin, const char* patternEnd)
+ : Pattern(patternBegin, patternEnd)
+{
+ ComputePrefixFunction();
+}
+
+TKMPMatcher::TKMPMatcher(const TString& pattern)
+ : Pattern(pattern)
+{
+ ComputePrefixFunction();
+}
+
+void TKMPMatcher::ComputePrefixFunction() {
+ ssize_t* pf;
+ ::ComputePrefixFunction(Pattern.data(), Pattern.data() + Pattern.size(), &pf);
+ PrefixFunction.Reset(pf);
+}
diff --git a/library/cpp/deprecated/kmp/kmp.h b/library/cpp/deprecated/kmp/kmp.h
new file mode 100644
index 0000000000..a7f72eece6
--- /dev/null
+++ b/library/cpp/deprecated/kmp/kmp.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+
+template <typename T>
+void ComputePrefixFunction(const T* begin, const T* end, ssize_t** result) {
+ Y_ENSURE(begin != end, TStringBuf("empty pattern"));
+ ssize_t len = end - begin;
+ TArrayHolder<ssize_t> resultHolder(new ssize_t[len + 1]);
+ ssize_t i = 0;
+ ssize_t j = -1;
+ resultHolder[0] = -1;
+ while (i < len) {
+ while ((j >= 0) && (begin[j] != begin[i]))
+ j = resultHolder[j];
+ ++i;
+ ++j;
+ Y_ASSERT(i >= 0);
+ Y_ASSERT(j >= 0);
+ Y_ASSERT(j < len);
+ if ((i < len) && (begin[i] == begin[j]))
+ resultHolder[i] = resultHolder[j];
+ else
+ resultHolder[i] = j;
+ }
+ *result = resultHolder.Release();
+}
+
+class TKMPMatcher {
+private:
+ TArrayHolder<ssize_t> PrefixFunction;
+ TString Pattern;
+
+ void ComputePrefixFunction();
+
+public:
+ TKMPMatcher(const char* patternBegin, const char* patternEnd);
+ TKMPMatcher(const TString& pattern);
+
+ bool SubStr(const char* begin, const char* end, const char*& result) const {
+ Y_ASSERT(begin <= end);
+ ssize_t m = Pattern.size();
+ ssize_t n = end - begin;
+ ssize_t i, j;
+ for (i = 0, j = 0; (i < n) && (j < m); ++i, ++j) {
+ while ((j >= 0) && (Pattern[j] != begin[i]))
+ j = PrefixFunction[j];
+ }
+ if (j == m) {
+ result = begin + i - m;
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+template <typename T>
+class TKMPStreamMatcher {
+public:
+ class ICallback {
+ public:
+ virtual void OnMatch(const T* begin, const T* end) = 0;
+ virtual ~ICallback() = default;
+ };
+
+private:
+ ICallback* Callback;
+ TArrayHolder<ssize_t> PrefixFunction;
+ using TTVector = TVector<T>;
+ TTVector Pattern;
+ ssize_t State;
+ TTVector Candidate;
+
+public:
+ TKMPStreamMatcher(const T* patternBegin, const T* patternEnd, ICallback* callback)
+ : Callback(callback)
+ , Pattern(patternBegin, patternEnd)
+ , State(0)
+ , Candidate(Pattern.size())
+ {
+ ssize_t* pf;
+ ComputePrefixFunction(patternBegin, patternEnd, &pf);
+ PrefixFunction.Reset(pf);
+ }
+
+ void Push(const T& symbol) {
+ while ((State >= 0) && (Pattern[State] != symbol)) {
+ Y_ASSERT(State <= (ssize_t) Pattern.size());
+ State = PrefixFunction[State];
+ Y_ASSERT(State <= (ssize_t) Pattern.size());
+ }
+ if (State >= 0)
+ Candidate[State] = symbol;
+ ++State;
+ if (State == (ssize_t) Pattern.size()) {
+ Callback->OnMatch(Candidate.begin(), Candidate.end());
+ State = 0;
+ }
+ }
+
+ void Clear() {
+ State = 0;
+ }
+};
diff --git a/library/cpp/deprecated/kmp/kmp_ut.cpp b/library/cpp/deprecated/kmp/kmp_ut.cpp
new file mode 100644
index 0000000000..c2eda83c57
--- /dev/null
+++ b/library/cpp/deprecated/kmp/kmp_ut.cpp
@@ -0,0 +1,80 @@
+#include "kmp.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/output.h>
+
+static TVector<int> FindAll(const TString& pattern, const TString& string) {
+ TVector<int> result;
+ TKMPMatcher kmp(pattern);
+ const char* pResult;
+ const char* begin = string.begin();
+ const char* end = string.end();
+ while (kmp.SubStr(begin, end, pResult)) {
+ result.push_back(int(pResult - string.data()));
+ begin = pResult + pattern.size();
+ }
+ return result;
+}
+
+class TTestKMP: public TTestBase {
+ UNIT_TEST_SUITE(TTestKMP);
+ UNIT_TEST(Test);
+ UNIT_TEST(TestStream);
+ UNIT_TEST_SUITE_END();
+
+public:
+ void Test() {
+ TVector<int> ans = {0, 2};
+ UNIT_ASSERT_EQUAL(FindAll("a", "aba"), ans);
+ ans = {0};
+ UNIT_ASSERT_EQUAL(FindAll("aba", "aba"), ans);
+ ans.clear();
+ UNIT_ASSERT_EQUAL(FindAll("abad", "aba"), ans);
+ ans = {0, 2};
+ UNIT_ASSERT_EQUAL(FindAll("ab", "abab"), ans);
+ }
+
+ class TKMPSimpleCallback: public TKMPStreamMatcher<int>::ICallback {
+ private:
+ int* Begin;
+ int* End;
+ int Count;
+
+ public:
+ TKMPSimpleCallback(int* begin, int* end)
+ : Begin(begin)
+ , End(end)
+ , Count(0)
+ {
+ }
+
+ void OnMatch(const int* begin, const int* end) override {
+ UNIT_ASSERT_EQUAL(end - begin, End - Begin);
+ const int* p0 = Begin;
+ const int* p1 = begin;
+ while (p0 < End) {
+ UNIT_ASSERT_EQUAL(*p0, *p1);
+ ++p0;
+ ++p1;
+ }
+ ++Count;
+ }
+
+ int GetCount() const {
+ return Count;
+ }
+ };
+
+ void TestStream() {
+ int pattern[] = {2, 3};
+ int data[] = {1, 2, 3, 5, 2, 2, 3, 2, 4, 3, 2};
+ TKMPSimpleCallback callback(pattern, pattern + 2);
+ TKMPStreamMatcher<int> matcher(pattern, pattern + 2, &callback);
+ for (auto& i : data)
+ matcher.Push(i);
+ UNIT_ASSERT_EQUAL(2, callback.GetCount());
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TTestKMP);
diff --git a/library/cpp/deprecated/kmp/ut/ya.make b/library/cpp/deprecated/kmp/ut/ya.make
new file mode 100644
index 0000000000..9c54ee2715
--- /dev/null
+++ b/library/cpp/deprecated/kmp/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/deprecated/kmp)
+
+OWNER(g:util)
+
+SRCS(
+ kmp_ut.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/kmp/ya.make b/library/cpp/deprecated/kmp/ya.make
new file mode 100644
index 0000000000..7c1c557934
--- /dev/null
+++ b/library/cpp/deprecated/kmp/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(g:util)
+
+SRCS(
+ kmp.cpp
+ kmp.h
+)
+
+END()