aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h')
-rw-r--r--library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h176
1 files changed, 88 insertions, 88 deletions
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
index 8a240bfed8..e3d3f5247e 100644
--- a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
+++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
@@ -9,34 +9,34 @@
#include <utility>
namespace NLevenshtein {
- enum EEditMoveType {
- EMT_SPECIAL,
- EMT_PRESERVE,
- EMT_REPLACE,
- EMT_DELETE,
- EMT_INSERT
- };
-
- inline bool IsImportantEditMove(EEditMoveType p) {
- return (p != EMT_SPECIAL && p != EMT_PRESERVE);
- }
-
- inline void MakeMove(EEditMoveType t, int& p1, int& p2) {
- switch (t) {
- case EMT_PRESERVE:
- case EMT_REPLACE:
- p1++;
- p2++;
- break;
- case EMT_DELETE:
- p1++;
- break;
- case EMT_INSERT:
- p2++;
- break;
- default:
- break;
- }
+ enum EEditMoveType {
+ EMT_SPECIAL,
+ EMT_PRESERVE,
+ EMT_REPLACE,
+ EMT_DELETE,
+ EMT_INSERT
+ };
+
+ inline bool IsImportantEditMove(EEditMoveType p) {
+ return (p != EMT_SPECIAL && p != EMT_PRESERVE);
+ }
+
+ inline void MakeMove(EEditMoveType t, int& p1, int& p2) {
+ switch (t) {
+ case EMT_PRESERVE:
+ case EMT_REPLACE:
+ p1++;
+ p2++;
+ break;
+ case EMT_DELETE:
+ p1++;
+ break;
+ case EMT_INSERT:
+ p2++;
+ break;
+ default:
+ break;
+ }
}
using TEditChain = TVector<EEditMoveType>;
@@ -58,7 +58,7 @@ namespace NLevenshtein {
template <typename TStringType>
using TCharType = typename std::decay_t<decltype(std::add_const_t<TStringType>()[0])>;
- /// Finds sequence of "edit moves" for two strings
+ /// Finds sequence of "edit moves" for two strings
template <class TStringType, class TWeightType = int,
class TReplaceWeigher = TWeightOneBinaryGetter<TCharType<TStringType>>,
class TDeleteWeigher = TWeightOneUnaryGetter<TCharType<TStringType>>,
@@ -69,20 +69,20 @@ namespace NLevenshtein {
const TDeleteWeigher& deleteWeigher = TDeleteWeigher(),
const TInsertWeigher& insertWeigher = TInsertWeigher())
{
- int l1 = (int)str1.size();
- int l2 = (int)str2.size();
+ int l1 = (int)str1.size();
+ int l2 = (int)str2.size();
TMatrix<std::pair<TWeightType, EEditMoveType>> ma(l1 + 1, l2 + 1); /// ma[i][j].first = diff(str1[0..i-1], str2[0..j-1])
- ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point
- for (int i = 1; i <= l1; i++) {
+ ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point
+ for (int i = 1; i <= l1; i++) {
ma[i][0] = std::make_pair(ma[i - 1][0].first + deleteWeigher(str1[i - 1]), EMT_DELETE);
}
- for (int i = 1; i <= l2; i++) {
+ for (int i = 1; i <= l2; i++) {
ma[0][i] = std::make_pair(ma[0][i - 1].first + insertWeigher(str2[i - 1]), EMT_INSERT);
}
- // Here goes basic Levestein's algorithm
- for (int i = 1; i <= l1; i++) {
- for (int j = 1; j <= l2; j++) {
+ // Here goes basic Levestein's algorithm
+ for (int i = 1; i <= l1; i++) {
+ for (int j = 1; j <= l2; j++) {
if (str1[i - 1] == str2[j - 1]) {
ma[i][j] = std::make_pair(ma[i - 1][j - 1].first, EMT_PRESERVE);
} else {
@@ -108,31 +108,31 @@ namespace NLevenshtein {
ma[i][j] = std::make_pair(insertPathWeight, EMT_INSERT);
}
}
- }
- }
- // Tracing the path from final point
- res.clear();
+ }
+ }
+ // Tracing the path from final point
+ res.clear();
res.reserve(Max<size_t>(l1, l2));
- for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) {
- res.push_back(ma[i][j].second);
- switch (ma[i][j].second) {
- case EMT_PRESERVE:
- case EMT_REPLACE:
- --i;
- --j;
- break;
- case EMT_DELETE:
- --i;
- break;
- case EMT_INSERT:
- --j;
- break;
- default:
- // TODO: throw exception
- break;
- }
- }
- std::reverse(res.begin(), res.end());
+ for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) {
+ res.push_back(ma[i][j].second);
+ switch (ma[i][j].second) {
+ case EMT_PRESERVE:
+ case EMT_REPLACE:
+ --i;
+ --j;
+ break;
+ case EMT_DELETE:
+ --i;
+ break;
+ case EMT_INSERT:
+ --j;
+ break;
+ default:
+ // TODO: throw exception
+ break;
+ }
+ }
+ std::reverse(res.begin(), res.end());
if (weight != nullptr) {
*weight = ma[l1][l2].first;
@@ -151,9 +151,9 @@ namespace NLevenshtein {
return result;
}
- /// Calculates substrings to be replaced for str1->str2 transformation
- struct TReplacement {
- int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength;
+ /// Calculates substrings to be replaced for str1->str2 transformation
+ struct TReplacement {
+ int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength;
TReplacement()
: CorrectOffset(0)
, CorrectLength(0)
@@ -161,32 +161,32 @@ namespace NLevenshtein {
, MisspelledLength(0)
{
}
- TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength)
- : CorrectOffset(correctOffset)
- , CorrectLength(correctLength)
- , MisspelledOffset(misspelledOffset)
- , MisspelledLength(misspelledLength)
- {
- }
- };
+ TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength)
+ : CorrectOffset(correctOffset)
+ , CorrectLength(correctLength)
+ , MisspelledOffset(misspelledOffset)
+ , MisspelledLength(misspelledLength)
+ {
+ }
+ };
template <class TStringType>
void GetStringReplacements(const TStringType& str1, const TStringType& str2, TVector<TReplacement>& res) {
- TEditChain editChain;
- GetEditChain(str1, str2, editChain);
- editChain.push_back(EMT_SPECIAL);
- int c1 = 0, c2 = 0;
- res.clear();
- for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) {
- if (IsImportantEditMove(*it)) {
- int sc1 = c1, sc2 = c2;
- do {
- MakeMove(*it, c1, c2);
- ++it;
- } while (IsImportantEditMove(*it));
- res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2));
- }
- MakeMove(*it, c1, c2);
- }
+ TEditChain editChain;
+ GetEditChain(str1, str2, editChain);
+ editChain.push_back(EMT_SPECIAL);
+ int c1 = 0, c2 = 0;
+ res.clear();
+ for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) {
+ if (IsImportantEditMove(*it)) {
+ int sc1 = c1, sc2 = c2;
+ do {
+ MakeMove(*it, c1, c2);
+ ++it;
+ } while (IsImportantEditMove(*it));
+ res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2));
+ }
+ MakeMove(*it, c1, c2);
+ }
}
}