aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/uri
diff options
context:
space:
mode:
authortrifon <trifon@yandex-team.ru>2022-02-10 16:50:51 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:51 +0300
commite3135d62bbcf321d86fff8258f5cdc5b2f57bde5 (patch)
treea5eeb758718dafefc9e215dae39f45cb61309f34 /library/cpp/uri
parent252a6c9fbded23dfee8729dc34c97159962216a7 (diff)
downloadydb-e3135d62bbcf321d86fff8258f5cdc5b2f57bde5.tar.gz
Restoring authorship annotation for <trifon@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/uri')
-rw-r--r--library/cpp/uri/assign.cpp38
-rw-r--r--library/cpp/uri/common.h46
-rw-r--r--library/cpp/uri/encode.cpp20
-rw-r--r--library/cpp/uri/encode.h34
-rw-r--r--library/cpp/uri/encodefsm.rl630
-rw-r--r--library/cpp/uri/http_url.h14
-rw-r--r--library/cpp/uri/other.cpp4
-rw-r--r--library/cpp/uri/other.h4
-rw-r--r--library/cpp/uri/parse.cpp32
-rw-r--r--library/cpp/uri/parse.h26
-rw-r--r--library/cpp/uri/parsefsm.rl6164
-rw-r--r--library/cpp/uri/uri-ru_ut.cpp8
-rw-r--r--library/cpp/uri/uri.cpp132
-rw-r--r--library/cpp/uri/uri.h64
14 files changed, 308 insertions, 308 deletions
diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp
index ae9125c727..c84d015cb5 100644
--- a/library/cpp/uri/assign.cpp
+++ b/library/cpp/uri/assign.cpp
@@ -1,6 +1,6 @@
#include "uri.h"
#include "parse.h"
-
+
#include <contrib/libs/libidn/idna.h>
#include <library/cpp/charset/recyr.hh>
@@ -9,7 +9,7 @@
#include <util/string/cast.h>
#include <util/system/yassert.h>
#include <util/system/sys_alloc.h>
-
+
namespace NUri {
TMallocPtr<char> TUri::IDNToAscii(const wchar32* idna) {
// XXX: don't use punycode_encode directly as it doesn't include
@@ -138,18 +138,18 @@ namespace NUri {
const TSection& scheme = parser.Get(FieldScheme);
const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme);
-
+
// set the scheme always if available
if (schemeInfo.Str.empty() && scheme.IsSet())
FldSet(FieldScheme, scheme.Get());
-
+
if (ParsedOK != ret)
return ret;
-
+
size_t buflen = 0;
-
+
// special processing for fields
-
+
const bool convertIDN = parser.Flags & FeatureConvertHostIDN;
long flags = parser.Flags.Allow;
if (convertIDN)
@@ -196,7 +196,7 @@ namespace NUri {
// https://developers.google.com/webmasters/ajax-crawling/docs/specification
static const TStringBuf escFragPrefix(TStringBuf("_escaped_fragment_="));
-
+
bool encHashBangFrag = false;
TStringBuf qryBeforeEscapedFragment;
TStringBuf qryEscapedFragment;
@@ -250,7 +250,7 @@ namespace NUri {
if (FieldFrag == fld && qryEscapedFragment.IsInited())
continue;
-
+
char* beg = out.Buf();
TStringBuf val = section.Get();
long careFlags = section.GetFlagsEncode();
@@ -299,7 +299,7 @@ namespace NUri {
Y_ASSERT(beg >= out.Beg());
out.SetPos(end);
- }
+ }
FldSetNoDirty(fld, TStringBuf(beg, end));
@@ -312,8 +312,8 @@ namespace NUri {
}
out << '\0';
- }
-
+ }
+
if (hostConverted) {
char* beg = out.Buf();
out << hostascii;
@@ -322,7 +322,7 @@ namespace NUri {
FldSetNoDirty(fld, TStringBuf(beg, end));
out << '\0';
}
-
+
Buffer.Resize(out.Len());
if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) {
@@ -330,7 +330,7 @@ namespace NUri {
ret = ParsedBadScheme;
else
SetSchemeImpl(defscheme);
- }
+ }
if (0 == (parser.Flags & FeatureAllowEmptyPath))
CheckMissingFields();
@@ -340,7 +340,7 @@ namespace NUri {
if (!TryFromString<ui16>(port, Port))
ret = ParsedBadPort;
}
-
+
if (ParsedOK != ret)
return ret;
@@ -370,7 +370,7 @@ namespace NUri {
if (url.empty())
return ParsedEmpty;
-
+
if (maxlen > 0 && url.length() > maxlen)
return ParsedTooLong;
@@ -392,7 +392,7 @@ namespace NUri {
return ret;
Merge(base, PathOperationFlag(flags));
}
-
+
Rewrite();
return ret;
}
@@ -408,7 +408,7 @@ namespace NUri {
Rewrite();
return ret;
}
-
+
TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) {
const TState::EParsed ret = ParseImpl(
url, flags | FeatureNoRelPath, maxlen, defscheme, enc);
@@ -417,7 +417,7 @@ namespace NUri {
if (IsNull(FlagHost))
return ParsedBadHost;
-
+
Rewrite();
return ParsedOK;
}
diff --git a/library/cpp/uri/common.h b/library/cpp/uri/common.h
index 8025357763..54d5492bcc 100644
--- a/library/cpp/uri/common.h
+++ b/library/cpp/uri/common.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include <util/stream/output.h>
#include <util/system/compat.h>
#include <util/generic/strbuf.h>
@@ -10,7 +10,7 @@ namespace NUri {
class TEncodeMapperBase;
struct TCharFlags;
}
-
+
namespace NParse {
class TRange;
}
@@ -60,7 +60,7 @@ namespace NUri {
FIELD_FLAG(UrlMAX),
FIELD_FLAG(HostAscii),
FIELD_FLAG(AllMAX),
-
+
FlagHostPort = FlagHost | FlagPort,
FlagAuth = FlagUser | FlagPass,
FlagFragment = FlagFrag,
@@ -92,8 +92,8 @@ namespace NUri {
// add before this line
ParsedMAX
};
- };
-
+ };
+
struct TScheme {
// don't forget to define a SchemeRegistry entry
enum EKind {
@@ -110,19 +110,19 @@ namespace NUri {
,
SchemeUnknown
};
- };
-
+ };
+
class TFeature {
friend class NEncode::TEncoder;
friend class NEncode::TEncodeMapperBase;
friend struct NEncode::TCharFlags;
friend class TParser;
friend class NParse::TRange;
-
+
#define FEATURE_NAME(f) _BitFeature##f
#define FEATURE_FLAG_NAME(f) Feature##f
#define FEATURE_FLAG(f) FEATURE_FLAG_NAME(f) = 1UL << FEATURE_NAME(f)
-
+
protected:
enum EBit {
//==============================
@@ -134,10 +134,10 @@ namespace NUri {
// allows all known schemes in URL
FEATURE_NAME(SchemeKnown),
-
+
// allows all schemes, not only known
FEATURE_NAME(SchemeFlexible),
-
+
// allow opaque (RFC 2396) or rootless (RFC 3986) urls
FEATURE_NAME(AllowRootless),
@@ -156,7 +156,7 @@ namespace NUri {
// in scheme and host segments:
// change upper case letters onto lower case ones
FEATURE_NAME(ToLower),
-
+
// decode unreserved symbols
FEATURE_NAME(DecodeUnreserved),
@@ -209,7 +209,7 @@ namespace NUri {
// tries to fix errors (in particular, in fragment)
FEATURE_NAME(TryToFix),
-
+
// check host for DNS compliance
FEATURE_NAME(CheckHost),
@@ -228,7 +228,7 @@ namespace NUri {
// robot interpreted network paths as BadFormat urls
FEATURE_NAME(DenyNetworkPath),
-
+
// robot interprets URLs without a host as BadFormat
FEATURE_NAME(RemoteOnly),
@@ -315,15 +315,15 @@ namespace NUri {
FeaturesEncodeExtended = 0 | FeatureEncodeExtendedASCII | FeatureEncodeExtendedDelim,
FeaturesEncode = 0 | FeatureEncodeForSQL | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent | FeaturesEncodeExtended,
-
+
// these are not guaranteed to apply to a given field
FeatureDecodeAllowed = 0 | FeatureDecodeUnreserved | FeatureDecodeFieldAllowed,
-
+
FeaturesMaybeDecode = 0 | FeaturesDecode | FeatureDecodeAllowed,
-
+
FeaturesMaybeEncode = 0 | FeaturesEncode,
-
+
FeaturesEncodeDecode = 0 | FeaturesMaybeEncode | FeaturesMaybeDecode,
FeaturesAllEncoder = 0 | FeaturesEncodeDecode | FeatureDecodeANY | FeatureToLower | FeatureUpperEncoded | FeatureEncodeSpaceAsPlus,
@@ -333,12 +333,12 @@ namespace NUri {
FeaturesDefault = 0 // it reproduces old parsedURL
| FeaturePathOperation | FeaturePathDenyRootParent | FeatureCheckHost,
-
+
// essentially allows all valid RFC urls and keeps them as-is
FeaturesBare = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureAllowEmptyPath,
FeaturesAll = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureCheckHost | FeaturesNormalizeSet,
-
+
// Deprecated, use FeaturesRecommended
FeaturesRobotOld = 0
// http://tools.ietf.org/html/rfc3986#section-6.2.2
@@ -367,7 +367,7 @@ namespace NUri {
static inline int strnicmp(const char* lt, const char* rt, size_t len) {
return lt == rt ? 0 : ::strnicmp(lt, rt, len);
}
-
+
static inline int CompareNoCasePrefix(const TStringBuf& lt, const TStringBuf& rt) {
return strnicmp(lt.data(), rt.data(), rt.length());
}
@@ -381,7 +381,7 @@ namespace NUri {
return CompareNoCasePrefix(lt, rt);
return lt.length() < rt.length() ? -1 : 1;
}
-
+
class TSchemeInfo {
public:
const TScheme::EKind Kind;
@@ -413,7 +413,7 @@ namespace NUri {
static ui16 GetDefaultPort(TScheme::EKind scheme) {
return Get(scheme).Port;
}
-
+
private:
static const TSchemeInfo Registry[];
};
diff --git a/library/cpp/uri/encode.cpp b/library/cpp/uri/encode.cpp
index 584fb1bac9..34ca130ab8 100644
--- a/library/cpp/uri/encode.cpp
+++ b/library/cpp/uri/encode.cpp
@@ -1,7 +1,7 @@
#include "encode.h"
-
+
#include <util/generic/singleton.h>
-
+
namespace NUri {
namespace NEncode {
// http://tools.ietf.org/html/rfc3986#section-2.2
@@ -9,9 +9,9 @@ namespace NUri {
#define SUBDELIMS0 "!$&'()*+,;="
// http://tools.ietf.org/html/rfc3986#section-2.3
#define UNRESERVED "-._~"
-
+
// now find subsets which can sometimes be decoded
-
+
// remove '#' which can't ever be decoded
// don't mark anything allowed for pass (pass is completely encoded)
// safe in path, qry, frag
@@ -73,7 +73,7 @@ namespace NUri {
GetMutable('&').EncodeFld |= TField::FlagQuery;
GetMutable('+').EncodeFld |= TField::FlagQuery;
}
-
+
// should we decode an encoded character
bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const {
const ui64 myflags = flags & FeatFlags;
@@ -162,7 +162,7 @@ namespace NUri {
Encode(out, val[i]);
return out;
}
-
+
IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) {
for (size_t i = 0; i != val.length(); ++i) {
const char c = val[i];
@@ -186,7 +186,7 @@ namespace NUri {
}
return out;
}
-
+
IOutputStream& TEncoder::EncodeField(
IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) {
const ui32 fldmask = ui32(1) << fld;
@@ -199,10 +199,10 @@ namespace NUri {
}
return out;
}
-
+
void TEncoder::Do(unsigned char ch, int res) {
OutFlags |= GetFlags(ch).FeatFlags;
-
+
bool escapepct = false;
if (0 < res) // definitely encode
escapepct = FldDst.Enabled();
@@ -217,4 +217,4 @@ namespace NUri {
Hex(Out, ch);
}
}
-}
+}
diff --git a/library/cpp/uri/encode.h b/library/cpp/uri/encode.h
index a9ece15427..2c70b902b7 100644
--- a/library/cpp/uri/encode.h
+++ b/library/cpp/uri/encode.h
@@ -3,12 +3,12 @@
#include "common.h"
#include <util/stream/output.h>
-
+
namespace NUri {
namespace NEncode {
#define CHAR_TYPE_NAME(f) _ECT##f
#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)
-
+
enum ECharType {
CHAR_TYPE_NAME(Digit),
CHAR_TYPE_NAME(Lower),
@@ -29,10 +29,10 @@ namespace NUri {
ECGUnres = ECGAlnum | ECFUnres,
ECGStdrd = ECGUnres | ECFStdrd,
};
-
+
#undef CHAR_TYPE_NAME
#undef CHAR_TYPE_FLAG
-
+
struct TCharFlags {
ui32 TypeFlags;
ui64 FeatFlags;
@@ -135,7 +135,7 @@ namespace NUri {
static bool IsType(unsigned char c, ui64 flags) {
return GetFlags(c).TypeFlags & flags;
}
-
+
public:
static bool IsDigit(unsigned char c) {
return IsType(c, ECFDigit);
@@ -158,7 +158,7 @@ namespace NUri {
static const TCharFlags& GetFlags(unsigned char c) {
return Grammar().Get(c);
}
-
+
public:
// process an encoded string, decoding safe chars and encoding unsafe
static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
@@ -169,13 +169,13 @@ namespace NUri {
TEncoder(out, srcfld, dstfld).ReEncode(val);
return out;
}
-
+
// see also UrlUnescape() from string/quote.h
static IOutputStream& Decode(
IOutputStream& out, const TStringBuf& val, ui64 flags) {
return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
}
-
+
public:
// process a raw string or char, encode as needed
static IOutputStream& Hex(IOutputStream& out, unsigned char val);
@@ -185,18 +185,18 @@ namespace NUri {
}
static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);
-
+
static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);
static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
return EncodeField(out, val, TField::FieldAllMAX);
}
-
+
static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
return EncodeField(out, val, TField::FieldAllMAX, flags);
}
-
+
public:
class TGrammar {
TCharFlags Map_[256];
@@ -239,12 +239,12 @@ namespace NUri {
const TEncodeToMapper FldDst;
ui64 OutFlags;
int HexValue;
-
+
protected:
void HexReset() {
HexValue = 0;
}
-
+
void HexDigit(char c) {
HexAdd(c - '0');
}
@@ -254,7 +254,7 @@ namespace NUri {
void HexLower(char c) {
HexAdd(c - 'a' + 10);
}
-
+
void HexAdd(int val) {
HexValue <<= 4;
HexValue += val;
@@ -275,8 +275,8 @@ namespace NUri {
}
void Do(unsigned char, int);
};
- }
+ }
using TEncoder = NEncode::TEncoder;
-
-}
+
+}
diff --git a/library/cpp/uri/encodefsm.rl6 b/library/cpp/uri/encodefsm.rl6
index 6a323aa85a..b1cca61daf 100644
--- a/library/cpp/uri/encodefsm.rl6
+++ b/library/cpp/uri/encodefsm.rl6
@@ -1,5 +1,5 @@
#include <library/cpp/uri/encode.h>
-
+
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-variable"
#endif
@@ -7,45 +7,45 @@
namespace NUri {
namespace NEncode {
-%%{
+%%{
machine TEncoder;
-
+
hex = (
digit >{ HexDigit(fc); } |
[A-F] >{ HexUpper(fc); } |
[a-f] >{ HexLower(fc); }
);
-
+
escaped = ( "%" hex hex )
> { HexReset(); }
% { DoHex(); };
-
+
bad_escaped = ( "%" hex )
% {
DoSym(*(fpc - 2));
DoSym(*(fpc - 1));
};
-
+
sym = (any - bad_escaped - escaped) %{ DoSym(*(fpc - 1)); };
-
+
main := ( escaped | bad_escaped | sym )**;
write data;
-}%%
-
+}%%
+
ui64 TEncoder::ReEncode(const TStringBuf &url)
-{
+{
const char *p = url.data();
const char *pe = p + url.length();
const char *eof = pe;
- int cs;
+ int cs;
OutFlags = 0;
-
- %% write init;
- %% write exec;
+
+ %% write init;
+ %% write exec;
return OutFlags;
-}
+}
}
}
diff --git a/library/cpp/uri/http_url.h b/library/cpp/uri/http_url.h
index 7c8e8d844d..11ebfc939b 100644
--- a/library/cpp/uri/http_url.h
+++ b/library/cpp/uri/http_url.h
@@ -1,26 +1,26 @@
#pragma once
-
+
#include "uri.h"
#include "other.h"
// XXX: use NUri::TUri directly; this whole file is for backwards compatibility
-class THttpURL
+class THttpURL
: public NUri::TUri {
public:
typedef TField::EFlags TFlags;
typedef TField::EField TField;
typedef TScheme::EKind TSchemeKind;
typedef TState::EParsed TParsedState;
-
+
public:
enum {
FeatureUnescapeStandard = TFeature::FeatureDecodeStandard,
FeatureEscSpace = TFeature::FeatureEncodeSpaceAsPlus,
FeatureEscapeUnescaped = TFeature::FeatureEncodeExtendedASCII,
FeatureNormalPath = TFeature::FeaturePathStripRootParent,
- };
-
+ };
+
public:
THttpURL(unsigned defaultPort = 80)
: TUri(defaultPort)
@@ -31,7 +31,7 @@ public:
: TUri(host, port, path, query, scheme, defaultPort)
{
}
-
+
THttpURL(const TUri& url)
: TUri(url)
{
@@ -67,7 +67,7 @@ public: // use TUri::FldXXX methods for better control
void Reset(EField field) {
FldClr(field);
}
-};
+};
static inline const char* HttpURLParsedStateToString(const NUri::TState::EParsed& t) {
return NUri::ParsedStateToString(t);
diff --git a/library/cpp/uri/other.cpp b/library/cpp/uri/other.cpp
index b23a5b68a9..8c161e906e 100644
--- a/library/cpp/uri/other.cpp
+++ b/library/cpp/uri/other.cpp
@@ -3,8 +3,8 @@
#include <util/string/util.h>
#include <util/system/yassert.h>
-/********************************************************/
-/********************************************************/
+/********************************************************/
+/********************************************************/
static const Tr InvertTr(".:/?#", "\005\004\003\002\001");
static const Tr RevertTr("\005\004\003\002\001", ".:/?#");
diff --git a/library/cpp/uri/other.h b/library/cpp/uri/other.h
index 7aec22e77b..9ce8112b3a 100644
--- a/library/cpp/uri/other.h
+++ b/library/cpp/uri/other.h
@@ -1,7 +1,7 @@
#pragma once
-
+
#include <util/generic/string.h>
-
+
// Some functions for inverted url representation
// No scheme cut-off, no 80th port normalization
diff --git a/library/cpp/uri/parse.cpp b/library/cpp/uri/parse.cpp
index 1db4e008c4..61f64be600 100644
--- a/library/cpp/uri/parse.cpp
+++ b/library/cpp/uri/parse.cpp
@@ -1,24 +1,24 @@
#include "parse.h"
#include "common.h"
#include "encode.h"
-
+
namespace NUri {
const TParseFlags TParser::FieldFlags[] =
{
TParseFlags(0 // FieldScheme
| TFeature::FeatureToLower,
0)
-
+
,
TParseFlags(0 // FieldUsername
| TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent,
0 | TFeature::FeatureToLower)
-
+
,
TParseFlags(0 // FieldPassword
| TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent,
0 | TFeature::FeatureToLower)
-
+
,
TParseFlags(0 // FieldHost
| TFeature::FeatureToLower | TFeature::FeatureUpperEncoded | (TFeature::FeaturesMaybeEncode & ~TFeature::FeatureEncodeExtendedDelim),
@@ -57,7 +57,7 @@ namespace NUri {
if (mask & TFeature::FeaturesDecode)
Decode += range.Decode;
}
-
+
}
void TParser::copyRequirementsImpl(const char* ptr) {
@@ -89,8 +89,8 @@ namespace NUri {
section.AddRange(CurRange, GetFieldFlags(fld));
}
CurRange.Reset();
- }
-
+ }
+
void TParser::PctEndImpl(const char* ptr) {
#ifdef DO_PRN
PrintHead(PctBegin, __FUNCTION__);
@@ -111,10 +111,10 @@ namespace NUri {
PctBegin = nullptr;
const unsigned char ch = HexValue;
ui64 flags = TEncoder::GetFlags('%').FeatFlags | TEncoder::GetFlags(ch).FeatFlags;
-
+
setRequirementExcept(ptr, flags, TFeature::FeaturesMaybeEncode);
}
-
+
TState::EParsed TParser::ParseImpl() {
#ifdef DO_PRN
PrintHead(UriStr.data(), "[Parsing]") << "URL";
@@ -142,7 +142,7 @@ namespace NUri {
if ((Flags & TFeature::FeatureDenyNetworkPath) && IsNetPath())
return TState::ParsedBadFormat;
-
+
const TSection& scheme = Sections[TField::FieldScheme];
Scheme = scheme.IsSet() ? TSchemeInfo::GetKind(scheme.Get()) : TScheme::SchemeEmpty;
const TSchemeInfo& schemeInfo = TSchemeInfo::Get(Scheme);
@@ -157,7 +157,7 @@ namespace NUri {
if (Flags & TFeature::FeatureAllowRootless)
return TState::ParsedOK;
-
+
if (!(Flags & TFeature::FeatureSchemeFlexible))
return TState::ParsedBadScheme;
@@ -166,16 +166,16 @@ namespace NUri {
checkSectionCollision(TField::FieldUser, TField::FieldHost);
checkSectionCollision(TField::FieldPass, TField::FieldPort);
-
+
if (0 == (Flags & TFeature::FeatureAuthSupported))
if (Sections[TField::FieldUser].IsSet() || Sections[TField::FieldPass].IsSet())
return TState::ParsedBadAuth;
-
+
TSection& host = Sections[TField::FieldHost];
if (host.IsSet())
for (; host.End != host.Beg && '.' == host.End[-1];)
--host.End;
-
+
if (scheme.IsSet()) {
ui64 wantCareFlags = 0;
switch (Scheme) {
@@ -193,7 +193,7 @@ namespace NUri {
TFeature::FeatureSchemeFlexible | TFeature::FeatureSchemeKnown;
break;
}
-
+
if (0 != wantCareFlags && 0 == (Flags & wantCareFlags))
return TState::ParsedBadScheme;
if ((schemeInfo.FldReq & TField::FlagHost) || (Flags & TFeature::FeatureRemoteOnly))
@@ -204,4 +204,4 @@ namespace NUri {
return TState::ParsedOK;
}
-}
+}
diff --git a/library/cpp/uri/parse.h b/library/cpp/uri/parse.h
index ca2358e572..544eeeac67 100644
--- a/library/cpp/uri/parse.h
+++ b/library/cpp/uri/parse.h
@@ -1,17 +1,17 @@
#pragma once
-
+
// #define DO_PRN
-
+
#include <cstddef>
#include "common.h"
-
+
#include <library/cpp/charset/doccodes.h>
#include <util/generic/strbuf.h>
#include <util/stream/output.h>
#include <util/string/cast.h>
#include <util/system/yassert.h>
-
+
namespace NUri {
class TParser;
@@ -123,12 +123,12 @@ namespace NUri {
End = pc;
return true;
}
-
+
void Set(const TStringBuf& buf) {
Enter(buf.data());
Leave(buf.data() + buf.length());
}
-
+
public:
bool IsSet() const {
return End;
@@ -191,7 +191,7 @@ namespace NUri {
|| TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath));
State = ParseImpl();
}
-
+
public:
const TSection& Get(TField::EField fld) const {
return Sections[fld];
@@ -253,7 +253,7 @@ namespace NUri {
#endif
Sections[fld].Reset(pc);
}
-
+
void storeSection(const TStringBuf& val, TField::EField fld) {
#ifdef DO_PRN
PrintHead(val.data(), __FUNCTION__, fld);
@@ -270,7 +270,7 @@ namespace NUri {
copyRequirements(pc);
Sections[fld].Enter(pc);
}
-
+
void finishSection(const char* pc, TField::EField fld) {
#ifdef DO_PRN
PrintHead(pc, __FUNCTION__, fld);
@@ -279,7 +279,7 @@ namespace NUri {
if (Sections[fld].Leave(pc))
copyRequirements(pc);
}
-
+
void setRequirement(const char* ptr, ui64 flags) {
#ifdef DO_PRN
PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
@@ -315,7 +315,7 @@ namespace NUri {
if (nullptr != CurRange.Beg && CurRange.Beg != ptr)
copyRequirementsImpl(ptr);
}
-
+
void HexDigit(const char* ptr, char c) {
Y_UNUSED(ptr);
HexAdd(c - '0');
@@ -336,7 +336,7 @@ namespace NUri {
HexValue = 0;
}
void HexSet(const char* ptr);
-
+
void PctEndImpl(const char* ptr);
void PctEnd(const char* ptr) {
if (nullptr != PctBegin && ptr != PctBegin)
@@ -358,4 +358,4 @@ namespace NUri {
TState::EParsed ParseImpl();
};
-}
+}
diff --git a/library/cpp/uri/parsefsm.rl6 b/library/cpp/uri/parsefsm.rl6
index 7097723650..45b1b29f43 100644
--- a/library/cpp/uri/parsefsm.rl6
+++ b/library/cpp/uri/parsefsm.rl6
@@ -1,36 +1,36 @@
#include <library/cpp/uri/parse.h>
-
+
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-variable"
#endif
-%%{
+%%{
machine TParser;
-
+
#================================================
# RFC 3986 http://tools.ietf.org/html/rfc3986
- # with some modifications
- #================================================
- # The RegEx
- #
- # http://www.ics.uci.edu/pub/ietf/uri/#Related
- # ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
- # 12 3 4 5 6 7 8 9
- #results in the following subexpression matches:
- # $1 = http:
- # $2 = http
- # $3 = //www.ics.uci.edu
- # $4 = www.ics.uci.edu
- # $5 = /pub/ietf/uri/
- # $6 = <undefined>
- # $7 = <undefined>
- # $8 = #Related
- # $9 = Related
- #
+ # with some modifications
+ #================================================
+ # The RegEx
+ #
+ # http://www.ics.uci.edu/pub/ietf/uri/#Related
+ # ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ # 12 3 4 5 6 7 8 9
+ #results in the following subexpression matches:
+ # $1 = http:
+ # $2 = http
+ # $3 = //www.ics.uci.edu
+ # $4 = www.ics.uci.edu
+ # $5 = /pub/ietf/uri/
+ # $6 = <undefined>
+ # $7 = <undefined>
+ # $8 = #Related
+ # $9 = Related
+ #
# So $2:scheme $4:authority $5:path $7:query $9:fragment
- #================================================
-
-
+ #================================================
+
+
#================================================
# List of all ASCII characters and where they can be used
#================================================
@@ -79,7 +79,7 @@
#================================================
# Actions used in multiple definitions
#================================================
-
+
action act_req_enc_sql { REQ(fpc, FeatureEncodeForSQL) }
# REQ must apply to a char in range but not after the range has been reset
@@ -94,17 +94,17 @@
action act_end_path { END(fpc, Path) }
- #================================================
+ #================================================
# RFC 3986 ABNFs
#================================================
-
+
DIGIT = digit;
-
+
ALPHA = ( upper >{ REQ(fpc, FeatureToLower) } ) |
lower;
-
+
ALNUM = ALPHA | DIGIT;
-
+
PCT = "%" >{ PctBeg(fpc); } ;
HEXDIG = (
@@ -112,20 +112,20 @@
| [A-F] >{ HexUpper(fpc, fc); }
| [a-f] >{ HexLower(fpc, fc); }
);
-
+
# HexSet sets REQ so must apply in range
HEXNUM = ( HEXDIG HEXDIG ) %{ HexSet(fpc - 1); };
-
+
pct_encoded = PCT HEXNUM;
unreserved = ALNUM | "-" | "." | "_" | "~";
-
+
gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@";
-
+
sub_delims = "!" | "$" | "&" | "(" | ")"
| "*" | "+" | "," | ";" | "="
| ( ['] >act_req_enc_sql );
-
+
#================================================
# Local ABNFs
@@ -135,7 +135,7 @@
# safe character sequences
safe = unreserved | pct_encoded | sub_delims;
-
+
# MOD: Yandex extensions
ext_ascii = (VALID - ascii) >{ REQ(fpc, FeatureEncodeExtendedASCII) };
@@ -144,7 +144,7 @@
) >{ REQ(fpc, FeatureEncodeExtendedDelim) }; # " fix hilite
ext_space = " " >{ REQ(fpc, FeatureEncodeSpace) };
ext_cntrl = cntrl >{ REQ(fpc, FeatureEncodeCntrl) };
-
+
pct_maybe_encoded = PCT (HEXDIG | HEXNUM)? ;
ext_safe = unreserved
| pct_maybe_encoded
@@ -157,49 +157,49 @@
# pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
# uric (RFC 2396)
# MOD: extension to format, add extended delimiters and 8-bit ascii
-
+
pchar_nc = ext_safe | "@";
pchar = pchar_nc | ":";
path_sep = "/";
uric = pchar | path_sep | "?";
-
-
+
+
#================================================
# Fields
#================================================
# Single fields use fXXX as machine definitions
-
-
+
+
#================================================
# Scheme
# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
#================================================
-
+
scheme = ( ALPHA ( ALPHA | DIGIT | "+" | "-" | "." )** );
fscheme = scheme >{ BEG(fpc, Scheme) } %{ END(fpc, Scheme) };
-
-
+
+
#================================================
# UserInfo
# userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
#================================================
-
+
# MOD: split into a pair of sections: username and password
-
+
fuser = ( ext_safe )** >{ BEG(fpc, User) } %{ END(fpc, User) };
fpass = ( ext_safe | ":" )** >{ BEG(fpc, Pass) } %{ END(fpc, Pass) };
userinfo = ( fuser ( ":" fpass )? ) ( "@" %act_clr_host @^act_clr_user );
-
-
+
+
#================================================
# Hostname
# host = IP-literal / IPv4address / reg-name
#================================================
-
+
# MOD: simplify IP-literal for now
IPv6address = (HEXDIG | ":" | ".")+;
IP_literal = "[" IPv6address "]";
-
+
# IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
# MOD: simplify dec-octet which originally matches only 0-255
@@ -218,18 +218,18 @@
host = IP_literal | IPv4address | (reg_name - IPv4address);
fhost = host? >act_beg_host %act_end_host;
fhost_nempty = host >act_beg_host %act_end_host;
-
-
+
+
#================================================
# Port
# port = *DIGIT
#================================================
-
+
# MOD: use fport? for empty
fport = DIGIT+ >{ BEG(fpc, Port) } %{ END(fpc, Port) };
-
-
- #================================================
+
+
+ #================================================
# Authority
# authority = [ userinfo "@" ] host [ ":" port ]
#================================================
@@ -247,8 +247,8 @@
# / path-empty ; zero characters
#================================================
- # checkPath rules
-
+ # checkPath rules
+
checkPathHead =
"." ( "."? path_sep VALID* )? %act_req_pathop ;
@@ -265,17 +265,17 @@
# segment = *pchar
segment = pchar**;
-
+
# segment-nz = 1*pchar
segment_nz = pchar+;
-
+
# segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
segment_nz_nc = pchar_nc+;
-
+
sep_segment = path_sep segment;
-
+
# non-standard definitions
-
+
fpath_abnempty =
(
( sep_segment+ )
@@ -291,7 +291,7 @@
)
>act_beg_path %act_req_pathop %act_end_path
;
-
+
# standard definitions
# do not save empty paths, they behave differently in relative resolutions
@@ -321,24 +321,24 @@
)
>act_beg_path %act_end_path
;
-
+
#================================================
# Query and fragment
# query = *( pchar / "/" / "?" )
# fragment = *( pchar / "/" / "?" )
#================================================
-
+
# MOD: fragment allows '#' characters
-
+
fquery = (uric )** >{ BEG(fpc, Query) } %{ END(fpc, Query) };
ffrag = (uric | "#")** >{ BEG(fpc, Frag) } %{ END(fpc, Frag) };
query_frag = ("?" fquery)? ("#" ffrag)? ;
-
-
- #================================================
+
+
+ #================================================
# final ABNFs
# URI-reference = URI / relative-ref
- #================================================
+ #================================================
# URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
# hier-part = "//" authority path-abempty
# / path-absolute
@@ -349,9 +349,9 @@
# / path-absolute
# / path-noscheme
# / path-empty
-
+
net_path = "//" authority fpath_abempty;
-
+
URI =
fscheme ":"
(
@@ -387,7 +387,7 @@
$^act_clr_scheme
query_frag
;
-
+
host_path =
(
fhost_nempty fpath_abempty
@@ -459,18 +459,18 @@
;
write data;
-
-}%%
-
+
+}%%
+
namespace NUri {
bool TParser::doParse(const char* str_beg, size_t length)
-{
+{
const char* p = str_beg;
const char* pe = str_beg + length;
const char* eof = pe;
int cs;
-
+
#define BEG(ptr, fld) startSection (ptr, TField::Field ## fld);
#define END(ptr, fld) finishSection(ptr, TField::Field ## fld);
#define SET(val, fld) storeSection(val, TField::Field ## fld);
@@ -488,7 +488,7 @@ bool TParser::doParse(const char* str_beg, size_t length)
}
%% write exec;
-
+
#undef BEG
#undef END
#undef SET
@@ -496,6 +496,6 @@ bool TParser::doParse(const char* str_beg, size_t length)
#undef REQ
return cs >= TParser_first_final;
-}
+}
}
diff --git a/library/cpp/uri/uri-ru_ut.cpp b/library/cpp/uri/uri-ru_ut.cpp
index ec35a164d2..2707a4c22f 100644
--- a/library/cpp/uri/uri-ru_ut.cpp
+++ b/library/cpp/uri/uri-ru_ut.cpp
@@ -45,23 +45,23 @@ namespace NUri {
TState::ParsedOK);
UNIT_ASSERT_VALUES_EQUAL(url.PrintS(),
AsWin1251("www.TEST.Ru/%D4%C5%D3%D4\\'\".html?%D4%C5%D3%D4\\'\"=%D4%C5%D3%D4+\\'\"%10"));
-
+
UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"),
TFeature::FeaturesDefault | TFeature::FeatureEncodeForSQL),
TState::ParsedOK);
UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ%5C%27%22.html?ФЕУФ%5C%27%22=ФЕУФ+%5C%27%22%10"));
-
+
UNIT_ASSERT_VALUES_EQUAL(url.Parse("q/%33%26%13%2f%2b%30%20",
TFeature::FeaturesDefault | TFeature::FeatureDecodeStandard),
TState::ParsedOK);
UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "q/3%26%13/%2B0%20");
-
+
UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.prime-tass.ru/news/0/{656F5BAE-6677-4762-9BED-9E3B77E72055}.uif"),
TState::ParsedOK);
UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path"), TState::ParsedOK);
UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path", TFeature::FeaturesRobot), TState::ParsedOK);
}
-
+
const TString links[] = {
"viewforum.php?f=1&amp;sid=b4568481b67b1d7683bea78634b2e240", "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240",
"./viewtopic.php?p=74&amp;sid=6#p74", "./viewtopic.php?p=74&sid=6#p74",
diff --git a/library/cpp/uri/uri.cpp b/library/cpp/uri/uri.cpp
index 56a9a4e5ef..7f5540471a 100644
--- a/library/cpp/uri/uri.cpp
+++ b/library/cpp/uri/uri.cpp
@@ -79,7 +79,7 @@ namespace NUri {
FldTrySet(FieldHost, host);
FldTrySet(FieldPath, path);
FldTrySet(FieldQuery, query);
-
+
Rewrite();
}
@@ -102,7 +102,7 @@ namespace NUri {
default:
break;
}
-
+
if (!value.IsInited()) {
FldClr(field);
return false;
@@ -177,10 +177,10 @@ namespace NUri {
out << '\0';
}
Buffer = std::move(newbuf);
- }
+ }
CheckMissingFields();
-
+
FieldsDirty = 0;
}
@@ -191,8 +191,8 @@ namespace NUri {
if (GetSchemeInfo().FldReq & FlagPath)
// ... set path
FldSetNoDirty(FieldPath, TStringBuf("/"));
- }
-
+ }
+
/********************************************************/
void TUri::Merge(const TUri& base, int correctAbs) {
if (base.Scheme == SchemeUnknown)
@@ -200,7 +200,7 @@ namespace NUri {
if (!base.IsValidGlobal())
return;
-
+
const TStringBuf& selfscheme = GetField(FieldScheme);
// basescheme is present since IsValidGlobal() succeeded
const TStringBuf& basescheme = base.GetField(FieldScheme);
@@ -225,11 +225,11 @@ namespace NUri {
if (!IsNull(FlagHost))
break; // no merge
-
+
FldTrySet(FieldHost, base);
FldChkSet(FieldPort, base);
Port = base.Port;
-
+
if (noscheme && IsNull(FlagQuery) && IsNull(FlagPath))
FldTrySet(FieldQuery, base);
@@ -244,7 +244,7 @@ namespace NUri {
TStringBuf p0 = base.GetField(FieldPath);
if (!p0.IsInited())
p0 = rootPath;
-
+
TStringBuf p1 = GetField(FieldPath);
if (!p1.IsInited()) {
if (p0.data() != rootPath.data())
@@ -255,9 +255,9 @@ namespace NUri {
}
if (p1 && '/' == p1[0])
p1.Skip(1); // p0 will have one
-
+
bool pathop = true;
-
+
TTempBufOutput out(p0.length() + p1.length() + 4);
out << p0;
if ('/' != p0.back())
@@ -283,7 +283,7 @@ namespace NUri {
// rewrite only if borrowed fields from base
if (cleanFields & FieldsDirty)
RewriteImpl();
- }
+ }
/********************************************************/
TUri::TLinkType TUri::Normalize(const TUri& base,
@@ -293,7 +293,7 @@ namespace NUri {
return LinkIsBad;
const TStringBuf& host = GetHost();
-
+
// merge with base URL
// taken either from _BASE_ property or from optional argument
if (!codebase.empty()) {
@@ -307,27 +307,27 @@ namespace NUri {
// see SetProperty() for details
Merge(base);
}
-
+
// check result: must be correct absolute URL
if (!IsValidAbs())
return LinkBadAbs;
-
+
if (!host.empty()) {
// - we don't care about different ports for the same server
// - we don't care about win|www|koi|etc. preffixes for the same server
if (GetPort() != base.GetPort() || !EqualNoCase(host, base.GetHost()))
return LinkIsGlobal;
}
-
+
// find out if it is link to itself then ignore it
if (!Compare(base, FlagPath | FlagQuery))
return LinkIsFragment;
-
+
return LinkIsLocal;
- }
-
+ }
+
/********************************************************/
-
+
size_t TUri::PrintSize(ui32 flags) const {
size_t len = 10;
flags &= FieldsSet; // can't output what we don't have
@@ -344,11 +344,11 @@ namespace NUri {
len += v.length() + 1;
}
}
- }
+ }
return len;
- }
-
+ }
+
IOutputStream& TUri::PrintImpl(IOutputStream& out, int flags) const {
TStringBuf v;
@@ -356,7 +356,7 @@ namespace NUri {
flags &= FieldsSet; // can't print what we don't have
if (flags & FlagHostAscii)
flags |= FlagHost; // to make host checks simpler below
-
+
if (flags & FlagScheme) {
v = Fields[FieldScheme];
if (!v.empty())
@@ -369,7 +369,7 @@ namespace NUri {
flags & FlagHostAscii ? FieldHostAscii : FieldHost;
host = Fields[fldhost];
}
-
+
TStringBuf port;
if ((flags & FlagPort) && 0 != Port && Port != DefaultPort)
port = Fields[FieldPort];
@@ -394,16 +394,16 @@ namespace NUri {
}
out << '@';
- }
-
+ }
+
out << host;
-
+
if (port)
out << ':';
- }
+ }
if (port)
out << port;
-
+
if (flags & FlagPath) {
v = Fields[FieldPath];
// for relative, empty path is not the same as missing
@@ -411,22 +411,22 @@ namespace NUri {
v = TStringBuf(".");
out << v;
}
-
+
if (flags & FlagQuery) {
v = Fields[FieldQuery];
if (v.IsInited())
out << '?' << v;
}
-
+
if (flags & FlagFrag) {
v = Fields[FieldFrag];
if (v.IsInited())
out << '#' << v;
}
-
+
return out;
- }
-
+ }
+
/********************************************************/
int TUri::CompareField(EField fld, const TUri& url) const {
const TStringBuf& v0 = GetField(fld);
@@ -438,8 +438,8 @@ namespace NUri {
default:
return v0.compare(v1);
}
- }
-
+ }
+
/********************************************************/
int TUri::Compare(const TUri& url, int flags) const {
// first compare fields with default values
@@ -449,14 +449,14 @@ namespace NUri {
return ret;
flags &= ~FlagPort;
}
-
+
// compare remaining sets of available fields
const int rtflags = flags & url.FieldsSet;
flags &= FieldsSet;
const int fldcmp = flags - rtflags;
if (fldcmp)
return fldcmp;
-
+
// field sets are the same, compare the fields themselves
for (int i = 0; i < FieldAllMAX; ++i) {
const EField fld = EField(i);
@@ -468,8 +468,8 @@ namespace NUri {
}
return 0;
- }
-
+ }
+
/********************************************************/
bool TUri::PathOperation(char*& pathPtr, char*& pathEnd, int correctAbs) {
if (!pathPtr)
@@ -479,22 +479,22 @@ namespace NUri {
if ((pathEnd - pathPtr) >= 2 && *(pathEnd - 2) == '/' && *(pathEnd - 1) == '.') {
--pathEnd;
- }
+ }
char* p_wr = pathEnd;
int upCount = 0;
-
+
char* p_prev = pathEnd;
Y_ASSERT(p_prev > pathPtr);
while (p_prev > pathPtr && *(p_prev - 1) == '/')
p_prev--;
-
+
for (char* p_rd = p_prev; p_rd; p_rd = p_prev) {
Y_ASSERT(p_rd == pathEnd || p_rd[0] == '/');
p_prev = nullptr;
char* p = p_rd;
-
+
if (p > pathPtr) {
for (p--; *p != '/'; p--) {
if (p == pathPtr)
@@ -513,10 +513,10 @@ namespace NUri {
}
}
}
-
+
Y_ASSERT(p_prev == nullptr || p_prev[0] == '/');
//and the first symbol !='/' after p_prev is p
-
+
if (p == p_rd) {
//empty block:
if (p_prev) { //either tail:
@@ -525,9 +525,9 @@ namespace NUri {
continue;
} else { //or head of abs path
*(--p_wr) = '/';
- break;
+ break;
}
- }
+ }
if (p[0] == '.') {
if (p + 1 == p_rd) {
@@ -541,14 +541,14 @@ namespace NUri {
continue;
}
}
- }
-
+ }
+
if (upCount) {
//unregister "../" and not print
upCount--;
- continue;
- }
-
+ continue;
+ }
+
// print
Y_ASSERT(p < p_rd);
Y_ASSERT(!p_prev || *(p - 1) == '/');
@@ -558,9 +558,9 @@ namespace NUri {
int l = p_rd - p + 1;
p_wr -= l;
memmove(p_wr, p, l);
- }
- }
-
+ }
+ }
+
if (upCount) {
if (*pathPtr != '/') {
if (pathEnd == p_wr && *(p_wr - 1) == '.') {
@@ -570,9 +570,9 @@ namespace NUri {
}
for (; upCount > 0; upCount--) {
*(--p_wr) = '/';
- *(--p_wr) = '.';
- *(--p_wr) = '.';
- }
+ *(--p_wr) = '.';
+ *(--p_wr) = '.';
+ }
} else {
if (correctAbs > 0)
return false;
@@ -589,17 +589,17 @@ namespace NUri {
} else {
upCount = false;
}
- }
- }
-
+ }
+ }
+
Y_ASSERT(p_wr >= pathPtr);
-
+
if (upCount)
return false;
pathPtr = p_wr;
return true;
}
-
+
/********************************************************/
const char* LinkTypeToString(const TUri::TLinkType& t) {
switch (t) {
diff --git a/library/cpp/uri/uri.h b/library/cpp/uri/uri.h
index 3b6c19fe4a..398d15a975 100644
--- a/library/cpp/uri/uri.h
+++ b/library/cpp/uri/uri.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "common.h"
#include "encode.h"
@@ -13,7 +13,7 @@
#include <util/stream/output.h>
#include <util/stream/str.h>
#include <util/system/yassert.h>
-
+
#include <cstdlib>
namespace NUri {
@@ -41,7 +41,7 @@ namespace NUri {
TScheme::EKind Scheme;
/// contains fields out of buffer (and possibly not null-terminated)
ui32 FieldsDirty;
-
+
private:
void Alloc(size_t len) {
Dealloc(); // to prevent copy below
@@ -50,7 +50,7 @@ namespace NUri {
void Dealloc() {
Buffer.Clear();
}
-
+
void ClearImpl() {
Port = 0;
FieldsSet = 0;
@@ -114,7 +114,7 @@ namespace NUri {
FldSetNoDirty(fld, value);
FldMarkDirty(fld);
}
-
+
const TStringBuf& FldGet(EField fld) const {
return Fields[fld];
}
@@ -268,18 +268,18 @@ namespace NUri {
{
CopyImpl(url);
}
-
+
~TUri() {
Clear();
}
-
+
void Copy(const TUri& url) {
if (&url != this) {
CopyData(url);
CopyImpl(url);
}
}
-
+
void Clear() {
Dealloc();
ClearImpl();
@@ -288,7 +288,7 @@ namespace NUri {
ui32 GetFieldMask() const {
return FieldsSet;
}
-
+
ui32 GetUrlFieldMask() const {
return GetFieldMask() & FlagUrlFields;
}
@@ -305,10 +305,10 @@ namespace NUri {
if (FldIsDirty())
RewriteImpl();
}
-
+
private:
TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty);
-
+
TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8);
public:
@@ -353,16 +353,16 @@ namespace NUri {
// -1 - ignore portions
void Merge(const TUri& base, int correctAbs = -1);
-
+
TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), long careFlags = FeaturesDefault, ECharset enc = CODES_UTF8);
-
+
private:
int PrintFlags(int flags) const {
if (0 == (FlagUrlFields & flags))
flags |= FlagUrlFields;
return flags;
}
-
+
protected:
size_t PrintSize(ui32 flags) const;
@@ -421,7 +421,7 @@ namespace NUri {
Print(str, flags);
return str;
}
-
+
// Only non-default scheme and port are printed
char* PrintHost(char* str, size_t size) const {
return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
@@ -434,7 +434,7 @@ namespace NUri {
int Compare(const TUri& A, int flags = FlagUrlFields) const;
int CompareField(EField fld, const TUri& url) const;
-
+
const TStringBuf& GetField(EField fld) const {
return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>();
}
@@ -442,7 +442,7 @@ namespace NUri {
ui16 GetPort() const {
return 0 == Port ? DefaultPort : Port;
}
-
+
const TStringBuf& GetHost() const {
if (GetFieldMask() & FlagHostAscii)
return FldGet(FieldHostAscii);
@@ -450,7 +450,7 @@ namespace NUri {
return FldGet(FieldHost);
return Default<TStringBuf>();
}
-
+
bool UseHostAscii() {
return FldMov(FieldHostAscii, FieldHost);
}
@@ -465,11 +465,11 @@ namespace NUri {
bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const {
return !FldSetCmp(flags);
}
-
+
bool IsNull(EField fld) const {
return !FldIsSet(fld);
}
-
+
bool IsValidAbs() const {
if (IsNull(FlagScheme | FlagHost | FlagPath))
return false;
@@ -483,11 +483,11 @@ namespace NUri {
return true;
return IsAbsPathImpl();
}
-
+
bool IsRootless() const {
return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl();
}
-
+
// for RFC 2396 compatibility
bool IsOpaque() const {
return IsRootless();
@@ -502,33 +502,33 @@ namespace NUri {
bool operator!() const {
return IsNull();
}
-
+
bool Equal(const TUri& A, int flags = FlagUrlFields) const {
return (Compare(A, flags) == 0);
}
-
+
bool Less(const TUri& A, int flags = FlagUrlFields) const {
return (Compare(A, flags) < 0);
}
-
+
bool operator==(const TUri& A) const {
return Equal(A, FlagNoFrag);
}
-
+
bool operator!=(const TUri& A) const {
return !Equal(A, FlagNoFrag);
}
-
+
bool operator<(const TUri& A) const {
return Less(A, FlagNoFrag);
}
-
+
bool IsSameDocument(const TUri& other) const {
// pre: both *this and 'other' should be normalized to valid abs
Y_ASSERT(IsValidAbs());
return Equal(other, FlagNoFrag);
}
-
+
bool IsLocal(const TUri& other) const {
// pre: both *this and 'other' should be normalized to valid abs
Y_ASSERT(IsValidAbs() && other.IsValidAbs());
@@ -550,7 +550,7 @@ namespace NUri {
static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, long srcflags, EField dstfld, long dstflags) {
return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld));
}
-
+
static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, long flags = FeaturesEncodeDecode) {
return ReEncodeField(out, val, FieldAllMAX, flags);
}
@@ -559,7 +559,7 @@ namespace NUri {
return flags & FeaturePathDenyRootParent ? 1
: flags & FeaturePathStripRootParent ? -1 : 0;
}
-
+
static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs);
private:
@@ -588,7 +588,7 @@ namespace NUri {
class TUriUpdate {
TUri& Uri_;
-
+
public:
TUriUpdate(TUri& uri)
: Uri_(uri)