1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
#pragma once
#include <util/generic/fwd.h>
#include <util/generic/strbuf.h>
namespace NUrl {
/**
* Splits URL to host and path
* Example:
* auto [host, path] = SplitUrlToHostAndPath(url);
*
* @param[in] url any URL
* @param[out] <host, path> parsed host and path
*/
struct TSplitUrlToHostAndPathResult {
TStringBuf host;
TStringBuf path;
};
Y_PURE_FUNCTION
TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url Y_LIFETIME_BOUND);
bool HasLowerHost(const TStringBuf url);
TStringBuf CutHttpWwwPrefixes(const TStringBuf url Y_LIFETIME_BOUND);
TString MakeLowerHost(const TStringBuf url, size_t shift = 0);
TString MakeNormalized(const TStringBuf url);
} // namespace NUrl
Y_PURE_FUNCTION
size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept;
Y_PURE_FUNCTION
size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept;
Y_PURE_FUNCTION
size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept;
Y_PURE_FUNCTION
size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept;
/** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString
!!!!!!!!!!!! */
Y_PURE_FUNCTION
size_t GetSchemePrefixSize(const TStringBuf url) noexcept;
Y_PURE_FUNCTION
TStringBuf GetSchemePrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
//! removes protocol prefixes 'http://' and 'https://' from given URL
//! @note if URL has no prefix or some other prefix the function does nothing
//! @param url URL from which the prefix should be removed
//! @param ignorehttps if true, leaves https://
//! @return a new URL without protocol prefix
Y_PURE_FUNCTION
TStringBuf CutHttpPrefix(const TStringBuf url Y_LIFETIME_BOUND, bool ignorehttps = false) noexcept;
Y_PURE_FUNCTION
TWtringBuf CutHttpPrefix(const TWtringBuf url Y_LIFETIME_BOUND, bool ignorehttps = false) noexcept;
Y_PURE_FUNCTION
TStringBuf CutSchemePrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
//! adds specified scheme prefix if URL has no scheme
//! @note if URL has scheme prefix already the function returns unchanged URL
TString AddSchemePrefix(const TString& url, const TStringBuf scheme);
//! Same as `AddSchemePrefix(url, "http")`.
TString AddSchemePrefix(const TString& url);
Y_PURE_FUNCTION
TStringBuf GetHost(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf GetHostAndPort(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf GetSchemeHost(const TStringBuf url Y_LIFETIME_BOUND, bool trimHttp = true) noexcept;
Y_PURE_FUNCTION
TStringBuf GetSchemeHostAndPort(const TStringBuf url Y_LIFETIME_BOUND, bool trimHttp = true, bool trimDefaultPort = true) noexcept;
/**
* Splits URL to host and path
*
* @param[in] url any URL
* @param[out] host parsed host
* @param[out] path parsed path
*/
void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path);
void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path);
/**
* Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #)
*
* @param[in] url any URL
* @param[out] sanitizedUrl parsed URL without query and fragment parts
* @param[out] query parsed query
* @param[out] fragment parsed fragment
*/
void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment);
/**
* Extracts scheme, host and port from URL.
*
* Port will be parsed from URL with checks against ui16 overflow. If URL doesn't
* contain port it will be determined by one of the known schemes (currently
* https:// and http:// only).
* Given parameters will not be modified if URL has no appropriate components.
*
* @param[in] url any URL
* @param[out] scheme URL scheme
* @param[out] host host name
* @param[out] port parsed port number
* @return false if present port number cannot be parsed into ui16
* true otherwise.
*/
bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
/**
* Extracts scheme, host and port from URL.
*
* This function perform the same actions as TryGetSchemeHostAndPort(), but in
* case of impossibility to parse port number throws yexception.
*
* @param[in] url any URL
* @param[out] scheme URL scheme
* @param[out] host host name
* @param[out] port parsed port number
* @throws yexception if present port number cannot be parsed into ui16.
*/
void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
Y_PURE_FUNCTION
TStringBuf GetPathAndQuery(const TStringBuf url Y_LIFETIME_BOUND, bool trimFragment = true) noexcept;
/**
* Extracts host from url and cuts http(https) protocol prefix and port if any.
* @param[in] url any URL
* @return host without port and http(https) prefix.
*/
Y_PURE_FUNCTION
TStringBuf GetOnlyHost(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf GetParentDomain(const TStringBuf host Y_LIFETIME_BOUND, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru"
Y_PURE_FUNCTION
TStringBuf GetZone(const TStringBuf host Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf CutWWWPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf CutWWWNumberedPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
/**
* Cuts 'm.' prefix from url if and only if the url starts with it
* Example: 'm.some-domain.com' -> 'some-domain.com'.
* 'http://m.some-domain.com' is not changed
*
* @param[in] url any URL
* @return url without 'm.' or 'M.' prefix.
*/
Y_PURE_FUNCTION
TStringBuf CutMPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept;
Y_PURE_FUNCTION
TStringBuf GetDomain(const TStringBuf host Y_LIFETIME_BOUND) noexcept; // should not be used
size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size);
size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80);
Y_PURE_FUNCTION
TStringBuf RemoveFinalSlash(TStringBuf str Y_LIFETIME_BOUND) noexcept;
TStringBuf CutUrlPrefixes(TStringBuf url Y_LIFETIME_BOUND) noexcept;
bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf token) noexcept;
|