diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h')
-rw-r--r-- | library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h | 176 |
1 files changed, 88 insertions, 88 deletions
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h index 8a240bfed8..e3d3f5247e 100644 --- a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h +++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h @@ -9,34 +9,34 @@ #include <utility> namespace NLevenshtein { - enum EEditMoveType { - EMT_SPECIAL, - EMT_PRESERVE, - EMT_REPLACE, - EMT_DELETE, - EMT_INSERT - }; - - inline bool IsImportantEditMove(EEditMoveType p) { - return (p != EMT_SPECIAL && p != EMT_PRESERVE); - } - - inline void MakeMove(EEditMoveType t, int& p1, int& p2) { - switch (t) { - case EMT_PRESERVE: - case EMT_REPLACE: - p1++; - p2++; - break; - case EMT_DELETE: - p1++; - break; - case EMT_INSERT: - p2++; - break; - default: - break; - } + enum EEditMoveType { + EMT_SPECIAL, + EMT_PRESERVE, + EMT_REPLACE, + EMT_DELETE, + EMT_INSERT + }; + + inline bool IsImportantEditMove(EEditMoveType p) { + return (p != EMT_SPECIAL && p != EMT_PRESERVE); + } + + inline void MakeMove(EEditMoveType t, int& p1, int& p2) { + switch (t) { + case EMT_PRESERVE: + case EMT_REPLACE: + p1++; + p2++; + break; + case EMT_DELETE: + p1++; + break; + case EMT_INSERT: + p2++; + break; + default: + break; + } } using TEditChain = TVector<EEditMoveType>; @@ -58,7 +58,7 @@ namespace NLevenshtein { template <typename TStringType> using TCharType = typename std::decay_t<decltype(std::add_const_t<TStringType>()[0])>; - /// Finds sequence of "edit moves" for two strings + /// Finds sequence of "edit moves" for two strings template <class TStringType, class TWeightType = int, class TReplaceWeigher = TWeightOneBinaryGetter<TCharType<TStringType>>, class TDeleteWeigher = TWeightOneUnaryGetter<TCharType<TStringType>>, @@ -69,20 +69,20 @@ namespace NLevenshtein { const TDeleteWeigher& deleteWeigher = TDeleteWeigher(), const TInsertWeigher& insertWeigher = TInsertWeigher()) { - int l1 = (int)str1.size(); - int l2 = (int)str2.size(); + int l1 = (int)str1.size(); + int l2 = (int)str2.size(); TMatrix<std::pair<TWeightType, EEditMoveType>> ma(l1 + 1, l2 + 1); /// ma[i][j].first = diff(str1[0..i-1], str2[0..j-1]) - ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point - for (int i = 1; i <= l1; i++) { + ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point + for (int i = 1; i <= l1; i++) { ma[i][0] = std::make_pair(ma[i - 1][0].first + deleteWeigher(str1[i - 1]), EMT_DELETE); } - for (int i = 1; i <= l2; i++) { + for (int i = 1; i <= l2; i++) { ma[0][i] = std::make_pair(ma[0][i - 1].first + insertWeigher(str2[i - 1]), EMT_INSERT); } - // Here goes basic Levestein's algorithm - for (int i = 1; i <= l1; i++) { - for (int j = 1; j <= l2; j++) { + // Here goes basic Levestein's algorithm + for (int i = 1; i <= l1; i++) { + for (int j = 1; j <= l2; j++) { if (str1[i - 1] == str2[j - 1]) { ma[i][j] = std::make_pair(ma[i - 1][j - 1].first, EMT_PRESERVE); } else { @@ -108,31 +108,31 @@ namespace NLevenshtein { ma[i][j] = std::make_pair(insertPathWeight, EMT_INSERT); } } - } - } - // Tracing the path from final point - res.clear(); + } + } + // Tracing the path from final point + res.clear(); res.reserve(Max<size_t>(l1, l2)); - for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) { - res.push_back(ma[i][j].second); - switch (ma[i][j].second) { - case EMT_PRESERVE: - case EMT_REPLACE: - --i; - --j; - break; - case EMT_DELETE: - --i; - break; - case EMT_INSERT: - --j; - break; - default: - // TODO: throw exception - break; - } - } - std::reverse(res.begin(), res.end()); + for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) { + res.push_back(ma[i][j].second); + switch (ma[i][j].second) { + case EMT_PRESERVE: + case EMT_REPLACE: + --i; + --j; + break; + case EMT_DELETE: + --i; + break; + case EMT_INSERT: + --j; + break; + default: + // TODO: throw exception + break; + } + } + std::reverse(res.begin(), res.end()); if (weight != nullptr) { *weight = ma[l1][l2].first; @@ -151,9 +151,9 @@ namespace NLevenshtein { return result; } - /// Calculates substrings to be replaced for str1->str2 transformation - struct TReplacement { - int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength; + /// Calculates substrings to be replaced for str1->str2 transformation + struct TReplacement { + int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength; TReplacement() : CorrectOffset(0) , CorrectLength(0) @@ -161,32 +161,32 @@ namespace NLevenshtein { , MisspelledLength(0) { } - TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength) - : CorrectOffset(correctOffset) - , CorrectLength(correctLength) - , MisspelledOffset(misspelledOffset) - , MisspelledLength(misspelledLength) - { - } - }; + TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength) + : CorrectOffset(correctOffset) + , CorrectLength(correctLength) + , MisspelledOffset(misspelledOffset) + , MisspelledLength(misspelledLength) + { + } + }; template <class TStringType> void GetStringReplacements(const TStringType& str1, const TStringType& str2, TVector<TReplacement>& res) { - TEditChain editChain; - GetEditChain(str1, str2, editChain); - editChain.push_back(EMT_SPECIAL); - int c1 = 0, c2 = 0; - res.clear(); - for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) { - if (IsImportantEditMove(*it)) { - int sc1 = c1, sc2 = c2; - do { - MakeMove(*it, c1, c2); - ++it; - } while (IsImportantEditMove(*it)); - res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2)); - } - MakeMove(*it, c1, c2); - } + TEditChain editChain; + GetEditChain(str1, str2, editChain); + editChain.push_back(EMT_SPECIAL); + int c1 = 0, c2 = 0; + res.clear(); + for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) { + if (IsImportantEditMove(*it)) { + int sc1 = c1, sc2 = c2; + do { + MakeMove(*it, c1, c2); + ++it; + } while (IsImportantEditMove(*it)); + res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2)); + } + MakeMove(*it, c1, c2); + } } } |