aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvmordovin <vmordovin@yandex-team.com>2024-07-27 14:31:54 +0300
committervmordovin <vmordovin@yandex-team.com>2024-07-27 14:41:30 +0300
commit72c283db0b8f30e61ac1e4643ad530f7c027dac0 (patch)
tree364be474ebdbeb7da9ede834b2aa8b470aa0af1f
parent271bf6a41232dc7ad1f5e13fd33871400f27bd98 (diff)
downloadydb-72c283db0b8f30e61ac1e4643ad530f7c027dac0.tar.gz
Move code to library
Начал с того, что просто унес ```url.*``` из ```search/meta``` в ```search/meta/generic```. Закончилось тем, что еще и поотпиливал странные зависимости ```kernel/facts/credibility/database->search/meta``` и ```quality/functionality/facts/credibility/url_trie_map_lib->search/meta``` : общие библитотеки не имеют права зависеть от ```search/meta/*``` c1e3dbc38eff7073949a371f5fc59714eeb9306e
-rw-r--r--library/cpp/string_utils/url/url.cpp34
-rw-r--r--library/cpp/string_utils/url/url.h6
2 files changed, 39 insertions, 1 deletions
diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp
index c850afa5a7..4db902cf65 100644
--- a/library/cpp/string_utils/url/url.cpp
+++ b/library/cpp/string_utils/url/url.cpp
@@ -71,6 +71,40 @@ namespace NUrl {
return {host, path};
}
+ bool HasLowerHost(const TStringBuf &url) {
+ for (size_t n = 0; n < url.length(); ++n) {
+ if (url[n] == '/')
+ break;
+ if (isupper(url[n]))
+ return false;
+ }
+ return true;
+ }
+
+ TStringBuf CutHttpWwwPrefixes(const TStringBuf &url) {
+ TStringBuf urlCut = CutWWWPrefix(CutHttpPrefix(url));
+ if (!urlCut.empty() && urlCut.back() == '/')
+ urlCut = urlCut.substr(0, urlCut.length() - 1);
+ return urlCut;
+ }
+
+ TString MakeLowerHost(const TStringBuf &url, size_t shift) {
+ TString urlFixed(url);
+ for (char *c = urlFixed.begin() + shift; *c && (*c != '/'); ++c) {
+ *c = tolower(*c);
+ }
+
+ return urlFixed;
+ }
+
+ TString MakeNormalized(const TStringBuf &url) {
+ TStringBuf urlCut = CutHttpWwwPrefixes(url);
+ if (HasLowerHost(urlCut)) {
+ return ToString(urlCut);
+ }
+ return MakeLowerHost(urlCut);
+ }
+
} // namespace NUrl
size_t GetHttpPrefixSize(const char* url, bool ignorehttps) noexcept {
diff --git a/library/cpp/string_utils/url/url.h b/library/cpp/string_utils/url/url.h
index 68fa23d64c..9b73233078 100644
--- a/library/cpp/string_utils/url/url.h
+++ b/library/cpp/string_utils/url/url.h
@@ -21,6 +21,11 @@ namespace NUrl {
Y_PURE_FUNCTION
TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url);
+ bool HasLowerHost(const TStringBuf &url);
+ TStringBuf CutHttpWwwPrefixes(const TStringBuf &url);
+ TString MakeLowerHost(const TStringBuf &url, size_t shift = 0);
+ TString MakeNormalized(const TStringBuf &url);
+
} // namespace NUrl
Y_PURE_FUNCTION
@@ -170,4 +175,3 @@ TStringBuf RemoveFinalSlash(TStringBuf str) noexcept;
TStringBuf CutUrlPrefixes(TStringBuf url) noexcept;
bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept;
-