diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/uri | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/uri')
-rw-r--r-- | library/cpp/uri/assign.cpp | 652 | ||||
-rw-r--r-- | library/cpp/uri/common.cpp | 184 | ||||
-rw-r--r-- | library/cpp/uri/common.h | 790 | ||||
-rw-r--r-- | library/cpp/uri/encode.cpp | 386 | ||||
-rw-r--r-- | library/cpp/uri/encode.h | 534 | ||||
-rw-r--r-- | library/cpp/uri/http_url.h | 38 | ||||
-rw-r--r-- | library/cpp/uri/location.cpp | 44 | ||||
-rw-r--r-- | library/cpp/uri/location.h | 4 | ||||
-rw-r--r-- | library/cpp/uri/location_ut.cpp | 20 | ||||
-rw-r--r-- | library/cpp/uri/other.cpp | 20 | ||||
-rw-r--r-- | library/cpp/uri/other.h | 12 | ||||
-rw-r--r-- | library/cpp/uri/parse.cpp | 336 | ||||
-rw-r--r-- | library/cpp/uri/parse.h | 634 | ||||
-rw-r--r-- | library/cpp/uri/qargs.cpp | 480 | ||||
-rw-r--r-- | library/cpp/uri/qargs.h | 22 | ||||
-rw-r--r-- | library/cpp/uri/uri-ru_ut.cpp | 190 | ||||
-rw-r--r-- | library/cpp/uri/uri.cpp | 1004 | ||||
-rw-r--r-- | library/cpp/uri/uri.h | 1152 | ||||
-rw-r--r-- | library/cpp/uri/uri_ut.cpp | 1718 | ||||
-rw-r--r-- | library/cpp/uri/uri_ut.h | 116 | ||||
-rw-r--r-- | library/cpp/uri/ut/ya.make | 4 | ||||
-rw-r--r-- | library/cpp/uri/ya.make | 8 |
22 files changed, 4174 insertions, 4174 deletions
diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp index 25b663b636..ae9125c727 100644 --- a/library/cpp/uri/assign.cpp +++ b/library/cpp/uri/assign.cpp @@ -8,418 +8,418 @@ #include <util/memory/tempbuf.h> #include <util/string/cast.h> #include <util/system/yassert.h> -#include <util/system/sys_alloc.h> +#include <util/system/sys_alloc.h> namespace NUri { - TMallocPtr<char> TUri::IDNToAscii(const wchar32* idna) { - // XXX: don't use punycode_encode directly as it doesn't include - // proper stringprep and splitting on dot-equivalent characters - char* buf; - static_assert(sizeof(*idna) == sizeof(ui32), "fixme"); - if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*)idna, &buf, 0)) - buf = nullptr; - return buf; - } - - TMallocPtr<char> TUri::IDNToAscii(const TStringBuf& host, ECharset enc) { - TTempBuf buf(sizeof(wchar32) * (1 + host.length())); - wchar32* wbuf = reinterpret_cast<wchar32*>(buf.Data()); - - const size_t written = NDetail::NBaseOps::Recode(host, wbuf, enc).length(); - wbuf[written] = 0; - - return IDNToAscii(wbuf); - } - - TStringBuf TUri::HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc) { - TStringBuf outhost; // store the result here before returning it, to get RVO - - size_t buflen = 0; - - if (hasExtended && !allowIDN) - return outhost; // definitely can't convert - - // charset-recode: RFC 3986, 3.2.2, requires percent-encoded non-ASCII - // chars in reg-name to be UTF-8 so convert to UTF-8 prior to decoding - const bool recoding = CODES_UTF8 != enc && hasExtended; - if (recoding) { - size_t nrd, nwr; - buflen = host.length() * 4; - buf.Reset(static_cast<char*>(y_allocate(buflen))); - if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr)) - return outhost; - host = TStringBuf(buf.Get(), nwr); - } - - // percent-decode - if (0 == buflen) { - buflen = host.length(); - buf.Reset(static_cast<char*>(y_allocate(buflen))); - } - // decoding shortens so writing over host in buf is OK - TMemoryWriteBuffer out(buf.Get(), buflen); - TEncoder decoder(out, FeatureDecodeANY | FeatureToLower); - const long outFlags = decoder.ReEncode(host); - hasExtended = 0 != (outFlags & FeatureEncodeExtendedASCII); - - // check again - if (hasExtended && !allowIDN) - return outhost; + TMallocPtr<char> TUri::IDNToAscii(const wchar32* idna) { + // XXX: don't use punycode_encode directly as it doesn't include + // proper stringprep and splitting on dot-equivalent characters + char* buf; + static_assert(sizeof(*idna) == sizeof(ui32), "fixme"); + if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*)idna, &buf, 0)) + buf = nullptr; + return buf; + } + + TMallocPtr<char> TUri::IDNToAscii(const TStringBuf& host, ECharset enc) { + TTempBuf buf(sizeof(wchar32) * (1 + host.length())); + wchar32* wbuf = reinterpret_cast<wchar32*>(buf.Data()); + + const size_t written = NDetail::NBaseOps::Recode(host, wbuf, enc).length(); + wbuf[written] = 0; - host = out.Str(); - - // convert to punycode if needed - if (!hasExtended) { - outhost = host; - return outhost; - } - - TMallocPtr<char> puny; - try { - puny = IDNToAscii(host); - } catch (const yexception& /* exc */) { - } - - if (!puny) { - // XXX: try user charset unless UTF8 or converted to it - if (CODES_UTF8 == enc || recoding) - return outhost; - try { - puny = IDNToAscii(host, enc); - } catch (const yexception& /* exc */) { - return outhost; - } - if (!puny) - return outhost; - } - - buf = puny; - outhost = buf.Get(); - - return outhost; + return IDNToAscii(wbuf); } - TStringBuf TUri::HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc) { - // find what we have - long haveFlags = 0; - for (size_t i = 0; i != host.length(); ++i) - haveFlags |= TEncoder::GetFlags(host[i]).FeatFlags; + TStringBuf TUri::HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc) { + TStringBuf outhost; // store the result here before returning it, to get RVO + + size_t buflen = 0; + + if (hasExtended && !allowIDN) + return outhost; // definitely can't convert + + // charset-recode: RFC 3986, 3.2.2, requires percent-encoded non-ASCII + // chars in reg-name to be UTF-8 so convert to UTF-8 prior to decoding + const bool recoding = CODES_UTF8 != enc && hasExtended; + if (recoding) { + size_t nrd, nwr; + buflen = host.length() * 4; + buf.Reset(static_cast<char*>(y_allocate(buflen))); + if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr)) + return outhost; + host = TStringBuf(buf.Get(), nwr); + } - // interested in encoded characters or (if IDN is allowed) extended ascii - TStringBuf outhost; - const bool haveExtended = haveFlags & FeatureEncodeExtendedASCII; + // percent-decode + if (0 == buflen) { + buflen = host.length(); + buf.Reset(static_cast<char*>(y_allocate(buflen))); + } + // decoding shortens so writing over host in buf is OK + TMemoryWriteBuffer out(buf.Get(), buflen); + TEncoder decoder(out, FeatureDecodeANY | FeatureToLower); + const long outFlags = decoder.ReEncode(host); + hasExtended = 0 != (outFlags & FeatureEncodeExtendedASCII); + + // check again + if (hasExtended && !allowIDN) + return outhost; - if (!haveExtended || allowIDN) { - if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) - outhost = host; - else - outhost = HostToAscii(host, buf, haveExtended, allowIDN, enc); - } + host = out.Str(); - return outhost; - } + // convert to punycode if needed + if (!hasExtended) { + outhost = host; + return outhost; + } + + TMallocPtr<char> puny; + try { + puny = IDNToAscii(host); + } catch (const yexception& /* exc */) { + } - static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField fld, const TStringBuf& val, long flags) { + if (!puny) { + // XXX: try user charset unless UTF8 or converted to it + if (CODES_UTF8 == enc || recoding) + return outhost; + try { + puny = IDNToAscii(host, enc); + } catch (const yexception& /* exc */) { + return outhost; + } + if (!puny) + return outhost; + } + + buf = puny; + outhost = buf.Get(); + + return outhost; + } + + TStringBuf TUri::HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc) { + // find what we have + long haveFlags = 0; + for (size_t i = 0; i != host.length(); ++i) + haveFlags |= TEncoder::GetFlags(host[i]).FeatFlags; + + // interested in encoded characters or (if IDN is allowed) extended ascii + TStringBuf outhost; + const bool haveExtended = haveFlags & FeatureEncodeExtendedASCII; + + if (!haveExtended || allowIDN) { + if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) + outhost = host; + else + outhost = HostToAscii(host, buf, haveExtended, allowIDN, enc); + } + + return outhost; + } + + static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField fld, const TStringBuf& val, long flags) { if (val.empty()) - return false; - if (flags & TFeature::FeaturesAllEncoder) - TUri::ReEncodeField(out, val, fld, flags); + return false; + if (flags & TFeature::FeaturesAllEncoder) + TUri::ReEncodeField(out, val, fld, flags); else - out << val; - return true; + out << val; + return true; } - TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defscheme) { - Clear(); + TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defscheme) { + Clear(); - TState::EParsed ret = parser.State; - if (ParsedBadFormat <= ret) - return ret; + TState::EParsed ret = parser.State; + if (ParsedBadFormat <= ret) + return ret; - const TSection& scheme = parser.Get(FieldScheme); - const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme); + const TSection& scheme = parser.Get(FieldScheme); + const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme); - // set the scheme always if available + // set the scheme always if available if (schemeInfo.Str.empty() && scheme.IsSet()) - FldSet(FieldScheme, scheme.Get()); + FldSet(FieldScheme, scheme.Get()); - if (ParsedOK != ret) - return ret; + if (ParsedOK != ret) + return ret; - size_t buflen = 0; + size_t buflen = 0; - // special processing for fields + // special processing for fields - const bool convertIDN = parser.Flags & FeatureConvertHostIDN; - long flags = parser.Flags.Allow; - if (convertIDN) - flags |= FeatureAllowHostIDN | FeatureCheckHost; + const bool convertIDN = parser.Flags & FeatureConvertHostIDN; + long flags = parser.Flags.Allow; + if (convertIDN) + flags |= FeatureAllowHostIDN | FeatureCheckHost; - // process non-ASCII host for punycode + // process non-ASCII host for punycode - TMallocPtr<char> hostptr; - TStringBuf hostascii; // empty: use host field; non-empty: ascii - bool hostConverted = false; // hostascii is empty or the original - const TSection& host = parser.Get(FieldHost); - if (host.IsSet() && !FldIsSet(FieldHost)) { - const bool allowIDN = (flags & FeatureAllowHostIDN); - const TStringBuf hostbuf = host.Get(); + TMallocPtr<char> hostptr; + TStringBuf hostascii; // empty: use host field; non-empty: ascii + bool hostConverted = false; // hostascii is empty or the original + const TSection& host = parser.Get(FieldHost); + if (host.IsSet() && !FldIsSet(FieldHost)) { + const bool allowIDN = (flags & FeatureAllowHostIDN); + const TStringBuf hostbuf = host.Get(); - // if we know we have and allow extended-ASCII chars, no need to check further - if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) - hostascii = HostToAscii(hostbuf, hostptr, true, true, parser.Enc); - else - hostascii = HostToAscii(hostbuf, hostptr, allowIDN, parser.Enc); + // if we know we have and allow extended-ASCII chars, no need to check further + if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) + hostascii = HostToAscii(hostbuf, hostptr, true, true, parser.Enc); + else + hostascii = HostToAscii(hostbuf, hostptr, allowIDN, parser.Enc); if (hostascii.empty()) - ret = ParsedBadHost; // exists but cannot be converted - else if (hostbuf.data() != hostascii.data()) { - hostConverted = true; - buflen += 1 + hostascii.length(); - if (convertIDN) - FldMarkSet(FieldHost); // so that we don't process host below - } - } - - // add unprocessed fields - - for (int idx = 0; idx < FieldUrlMAX; ++idx) { - const EField fld = EField(idx); - const TSection& section = parser.Get(fld); - if (section.IsSet() && !FldIsSet(fld)) - buflen += 1 + section.EncodedLen(); // includes null + ret = ParsedBadHost; // exists but cannot be converted + else if (hostbuf.data() != hostascii.data()) { + hostConverted = true; + buflen += 1 + hostascii.length(); + if (convertIDN) + FldMarkSet(FieldHost); // so that we don't process host below + } } - if (0 == buflen) // no more sections set? - return ret; - // process #! fragments - // https://developers.google.com/webmasters/ajax-crawling/docs/specification + // add unprocessed fields + + for (int idx = 0; idx < FieldUrlMAX; ++idx) { + const EField fld = EField(idx); + const TSection& section = parser.Get(fld); + if (section.IsSet() && !FldIsSet(fld)) + buflen += 1 + section.EncodedLen(); // includes null + } + if (0 == buflen) // no more sections set? + return ret; + + // process #! fragments + // https://developers.google.com/webmasters/ajax-crawling/docs/specification static const TStringBuf escFragPrefix(TStringBuf("_escaped_fragment_=")); - bool encHashBangFrag = false; - TStringBuf qryBeforeEscapedFragment; - TStringBuf qryEscapedFragment; - do { - if (FldIsSet(FieldFrag) || FldIsSet(FieldQuery)) - break; - - const TSection& frag = parser.Get(FieldFrag); - if (frag.IsSet()) { - if (0 == (parser.Flags & FeatureHashBangToEscapedFragment)) - break; - const TStringBuf fragbuf = frag.Get(); + bool encHashBangFrag = false; + TStringBuf qryBeforeEscapedFragment; + TStringBuf qryEscapedFragment; + do { + if (FldIsSet(FieldFrag) || FldIsSet(FieldQuery)) + break; + + const TSection& frag = parser.Get(FieldFrag); + if (frag.IsSet()) { + if (0 == (parser.Flags & FeatureHashBangToEscapedFragment)) + break; + const TStringBuf fragbuf = frag.Get(); if (fragbuf.empty() || '!' != fragbuf[0]) - break; - encHashBangFrag = true; - // '!' will make space for '&' or '\0' if needed - buflen += escFragPrefix.length(); - buflen += 2 * fragbuf.length(); // we don't know how many will be encoded - } else { - const TSection& qry = parser.Get(FieldQuery); - if (!qry.IsSet()) - break; - // FeatureHashBangToEscapedFragment has preference - if (FeatureEscapedToHashBangFragment != (parser.Flags & FeaturesEscapedFragment)) - break; - qry.Get().RSplit('&', qryBeforeEscapedFragment, qryEscapedFragment); - if (!qryEscapedFragment.StartsWith(escFragPrefix)) { - qryEscapedFragment.Clear(); - break; - } - qryEscapedFragment.Skip(escFragPrefix.length()); - buflen += 2; // for '!' and '\0' in fragment - buflen -= escFragPrefix.length(); - } - } while (false); - - // now set all fields prior to validating + break; + encHashBangFrag = true; + // '!' will make space for '&' or '\0' if needed + buflen += escFragPrefix.length(); + buflen += 2 * fragbuf.length(); // we don't know how many will be encoded + } else { + const TSection& qry = parser.Get(FieldQuery); + if (!qry.IsSet()) + break; + // FeatureHashBangToEscapedFragment has preference + if (FeatureEscapedToHashBangFragment != (parser.Flags & FeaturesEscapedFragment)) + break; + qry.Get().RSplit('&', qryBeforeEscapedFragment, qryEscapedFragment); + if (!qryEscapedFragment.StartsWith(escFragPrefix)) { + qryEscapedFragment.Clear(); + break; + } + qryEscapedFragment.Skip(escFragPrefix.length()); + buflen += 2; // for '!' and '\0' in fragment + buflen -= escFragPrefix.length(); + } + } while (false); + + // now set all fields prior to validating Alloc(buflen); TMemoryWriteBuffer out(Buffer.data(), Buffer.size()); - for (int idx = 0; idx < FieldUrlMAX; ++idx) { - const EField fld = EField(idx); + for (int idx = 0; idx < FieldUrlMAX; ++idx) { + const EField fld = EField(idx); - const TSection& section = parser.Get(fld); - if (!section.IsSet() || FldIsSet(fld)) - continue; + const TSection& section = parser.Get(fld); + if (!section.IsSet() || FldIsSet(fld)) + continue; - if (FieldQuery == fld && encHashBangFrag) - continue; + if (FieldQuery == fld && encHashBangFrag) + continue; - if (FieldFrag == fld && qryEscapedFragment.IsInited()) - continue; + if (FieldFrag == fld && qryEscapedFragment.IsInited()) + continue; - char* beg = out.Buf(); - TStringBuf val = section.Get(); - long careFlags = section.GetFlagsEncode(); + char* beg = out.Buf(); + TStringBuf val = section.Get(); + long careFlags = section.GetFlagsEncode(); - switch (fld) { - default: - break; + switch (fld) { + default: + break; - case FieldQuery: - if (qryEscapedFragment.IsInited()) { - const EField dstfld = FieldFrag; // that's where we will store - out << '!'; + case FieldQuery: + if (qryEscapedFragment.IsInited()) { + const EField dstfld = FieldFrag; // that's where we will store + out << '!'; if (!qryEscapedFragment.empty()) - ReEncodeToField(out, qryEscapedFragment, fld, FeatureDecodeANY | careFlags, dstfld, FeatureDecodeANY | parser.GetFieldFlags(dstfld)); - FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); + ReEncodeToField(out, qryEscapedFragment, fld, FeatureDecodeANY | careFlags, dstfld, FeatureDecodeANY | parser.GetFieldFlags(dstfld)); + FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); if (qryBeforeEscapedFragment.empty()) - continue; - out << '\0'; - beg = out.Buf(); - val = qryBeforeEscapedFragment; - } - break; - - case FieldFrag: - if (encHashBangFrag) { - const EField dstfld = FieldQuery; // that's where we will store - const TSection& qry = parser.Get(dstfld); - if (qry.IsSet()) - if (AppendField(out, dstfld, qry.Get(), qry.GetFlagsEncode())) - out << '&'; - out << escFragPrefix; - val.Skip(1); // skip '!' - ReEncodeToField(out, val, fld, careFlags, dstfld, parser.GetFieldFlags(dstfld)); - FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); - continue; - } - break; - } - - AppendField(out, fld, val, careFlags); - char* end = out.Buf(); - - if (careFlags & FeaturePathOperation) { - if (!PathOperation(beg, end, PathOperationFlag(parser.Flags))) - return ParsedBadPath; - - Y_ASSERT(beg >= out.Beg()); - out.SetPos(end); + continue; + out << '\0'; + beg = out.Buf(); + val = qryBeforeEscapedFragment; + } + break; + + case FieldFrag: + if (encHashBangFrag) { + const EField dstfld = FieldQuery; // that's where we will store + const TSection& qry = parser.Get(dstfld); + if (qry.IsSet()) + if (AppendField(out, dstfld, qry.Get(), qry.GetFlagsEncode())) + out << '&'; + out << escFragPrefix; + val.Skip(1); // skip '!' + ReEncodeToField(out, val, fld, careFlags, dstfld, parser.GetFieldFlags(dstfld)); + FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); + continue; + } + break; } - FldSetNoDirty(fld, TStringBuf(beg, end)); - - // special character case - const long checkChars = section.GetFlagsAllPlaintext() & FeaturesCheckSpecialChar; - if (0 != checkChars) { // has unencoded special chars: check permission - const long allowChars = parser.GetFieldFlags(fld) & checkChars; - if (checkChars != allowChars) - ret = ParsedBadFormat; + AppendField(out, fld, val, careFlags); + char* end = out.Buf(); + + if (careFlags & FeaturePathOperation) { + if (!PathOperation(beg, end, PathOperationFlag(parser.Flags))) + return ParsedBadPath; + + Y_ASSERT(beg >= out.Beg()); + out.SetPos(end); + } + + FldSetNoDirty(fld, TStringBuf(beg, end)); + + // special character case + const long checkChars = section.GetFlagsAllPlaintext() & FeaturesCheckSpecialChar; + if (0 != checkChars) { // has unencoded special chars: check permission + const long allowChars = parser.GetFieldFlags(fld) & checkChars; + if (checkChars != allowChars) + ret = ParsedBadFormat; } out << '\0'; } - if (hostConverted) { - char* beg = out.Buf(); - out << hostascii; - char* end = out.Buf(); - const EField fld = convertIDN ? FieldHost : FieldHostAscii; - FldSetNoDirty(fld, TStringBuf(beg, end)); + if (hostConverted) { + char* beg = out.Buf(); + out << hostascii; + char* end = out.Buf(); + const EField fld = convertIDN ? FieldHost : FieldHostAscii; + FldSetNoDirty(fld, TStringBuf(beg, end)); out << '\0'; - } + } Buffer.Resize(out.Len()); - if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) { - if (SchemeUnknown == defscheme) - ret = ParsedBadScheme; - else - SetSchemeImpl(defscheme); + if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) { + if (SchemeUnknown == defscheme) + ret = ParsedBadScheme; + else + SetSchemeImpl(defscheme); } - if (0 == (parser.Flags & FeatureAllowEmptyPath)) - CheckMissingFields(); + if (0 == (parser.Flags & FeatureAllowEmptyPath)) + CheckMissingFields(); - const TStringBuf& port = GetField(FieldPort); + const TStringBuf& port = GetField(FieldPort); if (!port.empty()) { - if (!TryFromString<ui16>(port, Port)) - ret = ParsedBadPort; + if (!TryFromString<ui16>(port, Port)) + ret = ParsedBadPort; } - if (ParsedOK != ret) - return ret; + if (ParsedOK != ret) + return ret; - // run validity checks now that all fields are set + // run validity checks now that all fields are set - // check the host for DNS compliance - do { - if (0 == (flags & FeatureCheckHost)) - break; + // check the host for DNS compliance + do { + if (0 == (flags & FeatureCheckHost)) + break; if (hostascii.empty()) - hostascii = GetField(FieldHost); + hostascii = GetField(FieldHost); if (hostascii.empty()) - break; - // IP literal - if ('[' == hostascii[0] && ']' == hostascii.back()) - break; - ret = CheckHost(hostascii); - if (ParsedOK != ret) - return ret; - } while (false); - - return ret; + break; + // IP literal + if ('[' == hostascii[0] && ']' == hostascii.back()) + break; + ret = CheckHost(hostascii); + if (ParsedOK != ret) + return ret; + } while (false); + + return ret; } - TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { - Clear(); + TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { + Clear(); if (url.empty()) - return ParsedEmpty; + return ParsedEmpty; - if (maxlen > 0 && url.length() > maxlen) - return ParsedTooLong; + if (maxlen > 0 && url.length() > maxlen) + return ParsedTooLong; - const TParser parser(flags, url, enc); + const TParser parser(flags, url, enc); - return AssignImpl(parser, defscheme); - } - - TState::EParsed TUri::Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& url_base, ui32 maxlen, ECharset enc) { - const TParseFlags flags1 = flags.Exclude(FeatureNoRelPath); + return AssignImpl(parser, defscheme); + } + + TState::EParsed TUri::Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& url_base, ui32 maxlen, ECharset enc) { + const TParseFlags flags1 = flags.Exclude(FeatureNoRelPath); TState::EParsed ret = ParseImpl(url, url_base.empty() ? flags : flags1, maxlen, SchemeEmpty, enc); if (ParsedOK != ret) return ret; if (!url_base.empty() && !IsValidAbs()) { - TUri base; - ret = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; - Merge(base, PathOperationFlag(flags)); - } - - Rewrite(); + TUri base; + ret = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc); + if (ParsedOK != ret) + return ret; + Merge(base, PathOperationFlag(flags)); + } + + Rewrite(); return ret; - } + } - TState::EParsed TUri::Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags, ui32 maxlen, ECharset enc) { - const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); + TState::EParsed TUri::Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags, ui32 maxlen, ECharset enc) { + const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); if (ParsedOK != ret) return ret; - if (!IsValidAbs()) - Merge(base, PathOperationFlag(flags)); + if (!IsValidAbs()) + Merge(base, PathOperationFlag(flags)); - Rewrite(); + Rewrite(); return ret; - } + } - TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { - const TState::EParsed ret = ParseImpl( - url, flags | FeatureNoRelPath, maxlen, defscheme, enc); - if (ParsedOK != ret) - return ret; + TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { + const TState::EParsed ret = ParseImpl( + url, flags | FeatureNoRelPath, maxlen, defscheme, enc); + if (ParsedOK != ret) + return ret; - if (IsNull(FlagHost)) - return ParsedBadHost; + if (IsNull(FlagHost)) + return ParsedBadHost; - Rewrite(); - return ParsedOK; - } + Rewrite(); + return ParsedOK; + } } diff --git a/library/cpp/uri/common.cpp b/library/cpp/uri/common.cpp index f0419c6ae9..05af1e57d1 100644 --- a/library/cpp/uri/common.cpp +++ b/library/cpp/uri/common.cpp @@ -1,115 +1,115 @@ #include "common.h" - + #include <util/generic/map.h> -#include <util/generic/singleton.h> - +#include <util/generic/singleton.h> + namespace NUri { - static_assert(TFeature::FeatureMAX <= sizeof(unsigned long) * 8, "expect TFeature::FeatureMAX <= sizeof(unsigned long) * 8"); + static_assert(TFeature::FeatureMAX <= sizeof(unsigned long) * 8, "expect TFeature::FeatureMAX <= sizeof(unsigned long) * 8"); - const TSchemeInfo TSchemeInfo::Registry[] = { - TSchemeInfo(TScheme::SchemeEmpty, TStringBuf()), // scheme is empty and inited + const TSchemeInfo TSchemeInfo::Registry[] = { + TSchemeInfo(TScheme::SchemeEmpty, TStringBuf()), // scheme is empty and inited TSchemeInfo(TScheme::SchemeHTTP, TStringBuf("http"), TField::FlagHost | TField::FlagPath, 80), TSchemeInfo(TScheme::SchemeHTTPS, TStringBuf("https"), TField::FlagHost | TField::FlagPath, 443), TSchemeInfo(TScheme::SchemeFTP, TStringBuf("ftp"), TField::FlagHost | TField::FlagPath, 20), TSchemeInfo(TScheme::SchemeFILE, TStringBuf("file"), TField::FlagPath), TSchemeInfo(TScheme::SchemeWS, TStringBuf("ws"), TField::FlagHost | TField::FlagPath, 80), TSchemeInfo(TScheme::SchemeWSS, TStringBuf("wss"), TField::FlagHost | TField::FlagPath, 443), - // add above - TSchemeInfo(TScheme::SchemeUnknown, TStringBuf()) // scheme is empty and uninited - }; + // add above + TSchemeInfo(TScheme::SchemeUnknown, TStringBuf()) // scheme is empty and uninited + }; + + namespace { + struct TLessNoCase { + bool operator()(const TStringBuf& lt, const TStringBuf& rt) const { + return 0 > CompareNoCase(lt, rt); + } + }; + + class TSchemeInfoMap { + typedef TMap<TStringBuf, TScheme::EKind, TLessNoCase> TdMap; + TdMap Map_; - namespace { - struct TLessNoCase { - bool operator()(const TStringBuf& lt, const TStringBuf& rt) const { - return 0 > CompareNoCase(lt, rt); - } - }; + public: + TSchemeInfoMap() { + for (int i = TScheme::SchemeEmpty; i < TScheme::SchemeUnknown; ++i) { + const TSchemeInfo& info = TSchemeInfo::Get(TScheme::EKind(i)); + Map_.insert(std::make_pair(info.Str, info.Kind)); + } + } - class TSchemeInfoMap { - typedef TMap<TStringBuf, TScheme::EKind, TLessNoCase> TdMap; - TdMap Map_; + TScheme::EKind Get(const TStringBuf& scheme) const { + const TdMap::const_iterator it = Map_.find(scheme); + return Map_.end() == it ? TScheme::SchemeUnknown : it->second; + } - public: - TSchemeInfoMap() { - for (int i = TScheme::SchemeEmpty; i < TScheme::SchemeUnknown; ++i) { - const TSchemeInfo& info = TSchemeInfo::Get(TScheme::EKind(i)); - Map_.insert(std::make_pair(info.Str, info.Kind)); - } - } + static const TSchemeInfoMap& Instance() { + return *Singleton<TSchemeInfoMap>(); + } + }; - TScheme::EKind Get(const TStringBuf& scheme) const { - const TdMap::const_iterator it = Map_.find(scheme); - return Map_.end() == it ? TScheme::SchemeUnknown : it->second; - } - - static const TSchemeInfoMap& Instance() { - return *Singleton<TSchemeInfoMap>(); - } - }; - } - - const TSchemeInfo& TSchemeInfo::Get(const TStringBuf& scheme) { - return Registry[TSchemeInfoMap::Instance().Get(scheme)]; + + const TSchemeInfo& TSchemeInfo::Get(const TStringBuf& scheme) { + return Registry[TSchemeInfoMap::Instance().Get(scheme)]; + } + + const char* ParsedStateToString(const TState::EParsed& t) { + switch (t) { + case TState::ParsedOK: + return "ParsedOK"; + case TState::ParsedEmpty: + return "ParsedEmpty"; + case TState::ParsedRootless: + return "ParsedRootless"; + case TState::ParsedBadFormat: + return "ParsedBadFormat"; + case TState::ParsedBadPath: + return "ParsedBadPath"; + case TState::ParsedTooLong: + return "ParsedTooLong"; + case TState::ParsedBadPort: + return "ParsedBadPort"; + case TState::ParsedBadAuth: + return "ParsedBadAuth"; + case TState::ParsedBadScheme: + return "ParsedBadScheme"; + case TState::ParsedBadHost: + return "ParsedBadHost"; + default: + return "Parsed[Unknown]"; + } } - - const char* ParsedStateToString(const TState::EParsed& t) { - switch (t) { - case TState::ParsedOK: - return "ParsedOK"; - case TState::ParsedEmpty: - return "ParsedEmpty"; - case TState::ParsedRootless: - return "ParsedRootless"; - case TState::ParsedBadFormat: - return "ParsedBadFormat"; - case TState::ParsedBadPath: - return "ParsedBadPath"; - case TState::ParsedTooLong: - return "ParsedTooLong"; - case TState::ParsedBadPort: - return "ParsedBadPort"; - case TState::ParsedBadAuth: - return "ParsedBadAuth"; - case TState::ParsedBadScheme: - return "ParsedBadScheme"; - case TState::ParsedBadHost: - return "ParsedBadHost"; - default: - return "Parsed[Unknown]"; - } - } - const char* FieldToString(const TField::EField& t) { - switch (t) { - case TField::FieldScheme: - return "scheme"; - case TField::FieldUser: - return "username"; - case TField::FieldPass: - return "password"; - case TField::FieldHost: - return "host"; - case TField::FieldHostAscii: - return "hostascii"; - case TField::FieldPort: - return "port"; - case TField::FieldPath: - return "path"; - case TField::FieldQuery: - return "query"; - case TField::FieldFrag: - return "fragment"; - default: - return "Field[Unknown]"; - } + const char* FieldToString(const TField::EField& t) { + switch (t) { + case TField::FieldScheme: + return "scheme"; + case TField::FieldUser: + return "username"; + case TField::FieldPass: + return "password"; + case TField::FieldHost: + return "host"; + case TField::FieldHostAscii: + return "hostascii"; + case TField::FieldPort: + return "port"; + case TField::FieldPath: + return "path"; + case TField::FieldQuery: + return "query"; + case TField::FieldFrag: + return "fragment"; + default: + return "Field[Unknown]"; + } } - const char* SchemeKindToString(const TScheme::EKind& t) { - const TSchemeInfo& info = TSchemeInfo::Get(t); + const char* SchemeKindToString(const TScheme::EKind& t) { + const TSchemeInfo& info = TSchemeInfo::Get(t); if (!info.Str.empty()) - return info.Str.data(); - return TScheme::SchemeEmpty == t ? "empty" : "unknown"; + return info.Str.data(); + return TScheme::SchemeEmpty == t ? "empty" : "unknown"; } } diff --git a/library/cpp/uri/common.h b/library/cpp/uri/common.h index bd1aca3318..8025357763 100644 --- a/library/cpp/uri/common.h +++ b/library/cpp/uri/common.h @@ -2,237 +2,237 @@ #include <util/stream/output.h> #include <util/system/compat.h> -#include <util/generic/strbuf.h> - +#include <util/generic/strbuf.h> + namespace NUri { - namespace NEncode { - class TEncoder; - class TEncodeMapperBase; - struct TCharFlags; - } - - namespace NParse { - class TRange; - } - - class TParser; - - struct TField { -#define FIELD_NAME(f) Field##f -#define FIELD_FLAG(f) Flag##f = 1U << FIELD_NAME(f) - - enum EField { - FIELD_NAME(Scheme), - FIELD_NAME(User), - FIELD_NAME(Pass), - FIELD_NAME(Host), - FIELD_NAME(Port), - FIELD_NAME(Path), - FIELD_NAME(Query), - FIELD_NAME(Frag), - - // add fields above - FieldUrlMAX, - // reset count so actual field offsets are not interrupted - FieldUrlLast = FieldUrlMAX - 1, - // add extra fields below - - FIELD_NAME(HostAscii), - - // add extra fields above - FieldAllMAX, - // add aliases below - - FieldUsername = FieldUser, - FieldPassword = FieldPass, - FieldFragment = FieldFrag, - }; - - enum EFlags { - FIELD_FLAG(Scheme), - FIELD_FLAG(User), - FIELD_FLAG(Pass), - FIELD_FLAG(Host), - FIELD_FLAG(Port), - FIELD_FLAG(Path), - FIELD_FLAG(Query), - FIELD_FLAG(Frag), - FIELD_FLAG(UrlMAX), - FIELD_FLAG(HostAscii), - FIELD_FLAG(AllMAX), - - FlagHostPort = FlagHost | FlagPort, - FlagAuth = FlagUser | FlagPass, - FlagFragment = FlagFrag, - FlagAction = FlagScheme | FlagHostPort | FlagPath, - FlagNoFrag = FlagAction | FlagQuery, - FlagUrlFields = FlagUrlMAX - 1, - FlagAll = FlagUrlFields, // obsolete, for backwards compatibility - FlagAllFields = FlagAllMAX - 1 - }; + namespace NEncode { + class TEncoder; + class TEncodeMapperBase; + struct TCharFlags; + } + + namespace NParse { + class TRange; + } + + class TParser; + + struct TField { +#define FIELD_NAME(f) Field##f +#define FIELD_FLAG(f) Flag##f = 1U << FIELD_NAME(f) + + enum EField { + FIELD_NAME(Scheme), + FIELD_NAME(User), + FIELD_NAME(Pass), + FIELD_NAME(Host), + FIELD_NAME(Port), + FIELD_NAME(Path), + FIELD_NAME(Query), + FIELD_NAME(Frag), + + // add fields above + FieldUrlMAX, + // reset count so actual field offsets are not interrupted + FieldUrlLast = FieldUrlMAX - 1, + // add extra fields below + + FIELD_NAME(HostAscii), + + // add extra fields above + FieldAllMAX, + // add aliases below + + FieldUsername = FieldUser, + FieldPassword = FieldPass, + FieldFragment = FieldFrag, + }; + + enum EFlags { + FIELD_FLAG(Scheme), + FIELD_FLAG(User), + FIELD_FLAG(Pass), + FIELD_FLAG(Host), + FIELD_FLAG(Port), + FIELD_FLAG(Path), + FIELD_FLAG(Query), + FIELD_FLAG(Frag), + FIELD_FLAG(UrlMAX), + FIELD_FLAG(HostAscii), + FIELD_FLAG(AllMAX), + + FlagHostPort = FlagHost | FlagPort, + FlagAuth = FlagUser | FlagPass, + FlagFragment = FlagFrag, + FlagAction = FlagScheme | FlagHostPort | FlagPath, + FlagNoFrag = FlagAction | FlagQuery, + FlagUrlFields = FlagUrlMAX - 1, + FlagAll = FlagUrlFields, // obsolete, for backwards compatibility + FlagAllFields = FlagAllMAX - 1 + }; #undef FIELD_NAME #undef FIELD_FLAG - }; - - struct TState { - enum EParsed { - ParsedOK = 0, - ParsedEmpty = 1, - ParsedOpaque = 2, - ParsedRootless = ParsedOpaque, - ParsedBadFormat, // must follow all non-error states immediately - ParsedBadPath, - ParsedTooLong, - ParsedBadPort, - ParsedBadAuth, - ParsedBadScheme, - ParsedBadHost, - - // add before this line - ParsedMAX - }; }; - struct TScheme { - // don't forget to define a SchemeRegistry entry - enum EKind { - SchemeEmpty - // add schemes below this line - , - SchemeHTTP, - SchemeHTTPS, - SchemeFTP, + struct TState { + enum EParsed { + ParsedOK = 0, + ParsedEmpty = 1, + ParsedOpaque = 2, + ParsedRootless = ParsedOpaque, + ParsedBadFormat, // must follow all non-error states immediately + ParsedBadPath, + ParsedTooLong, + ParsedBadPort, + ParsedBadAuth, + ParsedBadScheme, + ParsedBadHost, + + // add before this line + ParsedMAX + }; + }; + + struct TScheme { + // don't forget to define a SchemeRegistry entry + enum EKind { + SchemeEmpty + // add schemes below this line + , + SchemeHTTP, + SchemeHTTPS, + SchemeFTP, SchemeFILE, SchemeWS, SchemeWSS - // add schemes above this line - , - SchemeUnknown - }; + // add schemes above this line + , + SchemeUnknown + }; }; - class TFeature { - friend class NEncode::TEncoder; - friend class NEncode::TEncodeMapperBase; - friend struct NEncode::TCharFlags; - friend class TParser; - friend class NParse::TRange; + class TFeature { + friend class NEncode::TEncoder; + friend class NEncode::TEncodeMapperBase; + friend struct NEncode::TCharFlags; + friend class TParser; + friend class NParse::TRange; -#define FEATURE_NAME(f) _BitFeature##f -#define FEATURE_FLAG_NAME(f) Feature##f +#define FEATURE_NAME(f) _BitFeature##f +#define FEATURE_FLAG_NAME(f) Feature##f #define FEATURE_FLAG(f) FEATURE_FLAG_NAME(f) = 1UL << FEATURE_NAME(f) - protected: - enum EBit { - //============================== - // Cases interpreted as errors: - //============================== + protected: + enum EBit { + //============================== + // Cases interpreted as errors: + //============================== - // allows authorization user/password in URL - FEATURE_NAME(AuthSupported), + // allows authorization user/password in URL + FEATURE_NAME(AuthSupported), - // allows all known schemes in URL - FEATURE_NAME(SchemeKnown), + // allows all known schemes in URL + FEATURE_NAME(SchemeKnown), - // allows all schemes, not only known - FEATURE_NAME(SchemeFlexible), + // allows all schemes, not only known + FEATURE_NAME(SchemeFlexible), - // allow opaque (RFC 2396) or rootless (RFC 3986) urls - FEATURE_NAME(AllowRootless), + // allow opaque (RFC 2396) or rootless (RFC 3986) urls + FEATURE_NAME(AllowRootless), - //============================== - // Cases interpreted for processing (if required): - // (effects on result of Parse method) - //============================== + //============================== + // Cases interpreted for processing (if required): + // (effects on result of Parse method) + //============================== - // path needs normalization - // (simplification of directory tree: /../, /./, etc. - FEATURE_NAME(PathOperation), + // path needs normalization + // (simplification of directory tree: /../, /./, etc. + FEATURE_NAME(PathOperation), - // don't force empty path to "/" - FEATURE_NAME(AllowEmptyPath), + // don't force empty path to "/" + FEATURE_NAME(AllowEmptyPath), - // in scheme and host segments: - // change upper case letters onto lower case ones - FEATURE_NAME(ToLower), + // in scheme and host segments: + // change upper case letters onto lower case ones + FEATURE_NAME(ToLower), - // decode unreserved symbols - FEATURE_NAME(DecodeUnreserved), + // decode unreserved symbols + FEATURE_NAME(DecodeUnreserved), - // legacy: decode standard symbols which may be safe for some fields - FEATURE_NAME(DecodeStandardExtra), + // legacy: decode standard symbols which may be safe for some fields + FEATURE_NAME(DecodeStandardExtra), - // decode symbols allowed (not necessarily safe to decode) only for a given field - // (do not use directly, instead use FeatureDecodeSafe mask below) - FEATURE_NAME(DecodeFieldAllowed), + // decode symbols allowed (not necessarily safe to decode) only for a given field + // (do not use directly, instead use FeatureDecodeSafe mask below) + FEATURE_NAME(DecodeFieldAllowed), - // handling of spaces - FEATURE_NAME(EncodeSpace), + // handling of spaces + FEATURE_NAME(EncodeSpace), - // in query segment: change escaped space to '+' - FEATURE_NAME(EncodeSpaceAsPlus), + // in query segment: change escaped space to '+' + FEATURE_NAME(EncodeSpaceAsPlus), - // escape all string 'markup' symbols - FEATURE_NAME(EncodeForSQL), + // escape all string 'markup' symbols + FEATURE_NAME(EncodeForSQL), - // encoding of extended ascii symbols (8-bit) - FEATURE_NAME(EncodeExtendedASCII), + // encoding of extended ascii symbols (8-bit) + FEATURE_NAME(EncodeExtendedASCII), - // decoding of extended ascii symbols (8-bit) - FEATURE_NAME(DecodeExtendedASCII), + // decoding of extended ascii symbols (8-bit) + FEATURE_NAME(DecodeExtendedASCII), - // encoding of extended delimiter set - FEATURE_NAME(EncodeExtendedDelim), + // encoding of extended delimiter set + FEATURE_NAME(EncodeExtendedDelim), - // decoding of extended delimiter set - FEATURE_NAME(DecodeExtendedDelim), + // decoding of extended delimiter set + FEATURE_NAME(DecodeExtendedDelim), - // control characters [0x00 .. 0x20) - FEATURE_NAME(EncodeCntrl), + // control characters [0x00 .. 0x20) + FEATURE_NAME(EncodeCntrl), - // raw percent character - FEATURE_NAME(EncodePercent), + // raw percent character + FEATURE_NAME(EncodePercent), - // hash fragments - // https://developers.google.com/webmasters/ajax-crawling/docs/specification - // move and encode #! fragments to the query - FEATURE_NAME(HashBangToEscapedFragment), - // move and decode _escaped_fragment_ to the fragment - FEATURE_NAME(EscapedToHashBangFragment), + // hash fragments + // https://developers.google.com/webmasters/ajax-crawling/docs/specification + // move and encode #! fragments to the query + FEATURE_NAME(HashBangToEscapedFragment), + // move and decode _escaped_fragment_ to the fragment + FEATURE_NAME(EscapedToHashBangFragment), - // reject absolute paths started by "/../" - FEATURE_NAME(PathDenyRootParent), + // reject absolute paths started by "/../" + FEATURE_NAME(PathDenyRootParent), - // paths started by "/../" - ignore head - FEATURE_NAME(PathStripRootParent), + // paths started by "/../" - ignore head + FEATURE_NAME(PathStripRootParent), - // tries to fix errors (in particular, in fragment) - FEATURE_NAME(TryToFix), + // tries to fix errors (in particular, in fragment) + FEATURE_NAME(TryToFix), - // check host for DNS compliance - FEATURE_NAME(CheckHost), + // check host for DNS compliance + FEATURE_NAME(CheckHost), - // allow IDN hosts - // host is converted to punycode and stored in FieldHostAscii - // @note host contains characters in the charset of the document - // and percent-encoded characters in UTF-8 (RFC 3986, 3.2.2) - // @note if host contains no extended-ASCII characters and after - // percent-decoding cannot be converted from UTF-8 to UCS-4, - // try to recode from the document charset (if not UTF-8) - FEATURE_NAME(AllowHostIDN), + // allow IDN hosts + // host is converted to punycode and stored in FieldHostAscii + // @note host contains characters in the charset of the document + // and percent-encoded characters in UTF-8 (RFC 3986, 3.2.2) + // @note if host contains no extended-ASCII characters and after + // percent-decoding cannot be converted from UTF-8 to UCS-4, + // try to recode from the document charset (if not UTF-8) + FEATURE_NAME(AllowHostIDN), - // forces AllowHostIDN, but host is replaced with punycode - // forces CheckHost since this replacement is irreversible - FEATURE_NAME(ConvertHostIDN), + // forces AllowHostIDN, but host is replaced with punycode + // forces CheckHost since this replacement is irreversible + FEATURE_NAME(ConvertHostIDN), - // robot interpreted network paths as BadFormat urls - FEATURE_NAME(DenyNetworkPath), + // robot interpreted network paths as BadFormat urls + FEATURE_NAME(DenyNetworkPath), - // robot interprets URLs without a host as BadFormat - FEATURE_NAME(RemoteOnly), + // robot interprets URLs without a host as BadFormat + FEATURE_NAME(RemoteOnly), - /* non-RFC use case: + /* non-RFC use case: * 1. do not allow relative-path-only URIs when they can conflict with * "host/path" (that is, only "./path" or "../path" are allowed); * 2. if neither scheme nor userinfo are present but port is, it must @@ -243,269 +243,269 @@ namespace NUri { * "scheme:pa@th" over "user:pass@host", and even "host:port" when * host contains only scheme-legal characters. */ - FEATURE_NAME(NoRelPath), - - // standard prefers that all hex escapes were using uppercase A-F - FEATURE_NAME(UpperEncoded), - - // internal usage: decode all encoded symbols - FEATURE_NAME(DecodeANY), - - // add before this line - _FeatureMAX - }; - - protected: - enum EPrivate : ui32 { - FEATURE_FLAG(DecodeANY), - FEATURE_FLAG(DecodeFieldAllowed), - FEATURE_FLAG(DecodeStandardExtra), - }; - - public: - enum EPublic : ui32 { - FeatureMAX = _FeatureMAX, - FEATURE_FLAG(AuthSupported), - FEATURE_FLAG(SchemeKnown), - FEATURE_FLAG(SchemeFlexible), - FEATURE_FLAG(AllowRootless), - FEATURE_FLAG_NAME(AllowOpaque) = FEATURE_FLAG_NAME(AllowRootless), - FEATURE_FLAG(PathOperation), - FEATURE_FLAG(AllowEmptyPath), - FEATURE_FLAG(ToLower), - FEATURE_FLAG(DecodeUnreserved), - FEATURE_FLAG(EncodeSpace), - FEATURE_FLAG(EncodeSpaceAsPlus), - FEATURE_FLAG(EncodeForSQL), - FEATURE_FLAG(EncodeExtendedASCII), - FEATURE_FLAG(DecodeExtendedASCII), - FEATURE_FLAG(EncodeExtendedDelim), - FEATURE_FLAG(DecodeExtendedDelim), - FEATURE_FLAG(EncodeCntrl), - FEATURE_FLAG(EncodePercent), - FEATURE_FLAG(HashBangToEscapedFragment), - FEATURE_FLAG(EscapedToHashBangFragment), - FEATURE_FLAG(PathDenyRootParent), - FEATURE_FLAG(PathStripRootParent), - FEATURE_FLAG(TryToFix), - FEATURE_FLAG(CheckHost), - FEATURE_FLAG(AllowHostIDN), - FEATURE_FLAG(ConvertHostIDN), - FEATURE_FLAG(DenyNetworkPath), - FEATURE_FLAG(RemoteOnly), - FEATURE_FLAG(NoRelPath), - FEATURE_FLAG_NAME(HierURI) = FEATURE_FLAG_NAME(NoRelPath), - FEATURE_FLAG(UpperEncoded), - }; + FEATURE_NAME(NoRelPath), + + // standard prefers that all hex escapes were using uppercase A-F + FEATURE_NAME(UpperEncoded), + + // internal usage: decode all encoded symbols + FEATURE_NAME(DecodeANY), + + // add before this line + _FeatureMAX + }; + + protected: + enum EPrivate : ui32 { + FEATURE_FLAG(DecodeANY), + FEATURE_FLAG(DecodeFieldAllowed), + FEATURE_FLAG(DecodeStandardExtra), + }; + + public: + enum EPublic : ui32 { + FeatureMAX = _FeatureMAX, + FEATURE_FLAG(AuthSupported), + FEATURE_FLAG(SchemeKnown), + FEATURE_FLAG(SchemeFlexible), + FEATURE_FLAG(AllowRootless), + FEATURE_FLAG_NAME(AllowOpaque) = FEATURE_FLAG_NAME(AllowRootless), + FEATURE_FLAG(PathOperation), + FEATURE_FLAG(AllowEmptyPath), + FEATURE_FLAG(ToLower), + FEATURE_FLAG(DecodeUnreserved), + FEATURE_FLAG(EncodeSpace), + FEATURE_FLAG(EncodeSpaceAsPlus), + FEATURE_FLAG(EncodeForSQL), + FEATURE_FLAG(EncodeExtendedASCII), + FEATURE_FLAG(DecodeExtendedASCII), + FEATURE_FLAG(EncodeExtendedDelim), + FEATURE_FLAG(DecodeExtendedDelim), + FEATURE_FLAG(EncodeCntrl), + FEATURE_FLAG(EncodePercent), + FEATURE_FLAG(HashBangToEscapedFragment), + FEATURE_FLAG(EscapedToHashBangFragment), + FEATURE_FLAG(PathDenyRootParent), + FEATURE_FLAG(PathStripRootParent), + FEATURE_FLAG(TryToFix), + FEATURE_FLAG(CheckHost), + FEATURE_FLAG(AllowHostIDN), + FEATURE_FLAG(ConvertHostIDN), + FEATURE_FLAG(DenyNetworkPath), + FEATURE_FLAG(RemoteOnly), + FEATURE_FLAG(NoRelPath), + FEATURE_FLAG_NAME(HierURI) = FEATURE_FLAG_NAME(NoRelPath), + FEATURE_FLAG(UpperEncoded), + }; #undef FEATURE_NAME #undef FEATURE_FLAG - public: - //============================== - enum ESets { - // these are guaranteed and will change buffer size + public: + //============================== + enum ESets { + // these are guaranteed and will change buffer size - FeatureDecodeStandard = 0 | FeatureDecodeUnreserved | FeatureDecodeStandardExtra, + FeatureDecodeStandard = 0 | FeatureDecodeUnreserved | FeatureDecodeStandardExtra, - FeaturesDecodeExtended = 0 | FeatureDecodeExtendedASCII | FeatureDecodeExtendedDelim, + FeaturesDecodeExtended = 0 | FeatureDecodeExtendedASCII | FeatureDecodeExtendedDelim, - FeaturesDecode = 0 | FeatureDecodeUnreserved | FeatureDecodeStandard | FeaturesDecodeExtended, + FeaturesDecode = 0 | FeatureDecodeUnreserved | FeatureDecodeStandard | FeaturesDecodeExtended, - FeaturesEncodeExtended = 0 | FeatureEncodeExtendedASCII | FeatureEncodeExtendedDelim, + FeaturesEncodeExtended = 0 | FeatureEncodeExtendedASCII | FeatureEncodeExtendedDelim, - FeaturesEncode = 0 | FeatureEncodeForSQL | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent | FeaturesEncodeExtended, + FeaturesEncode = 0 | FeatureEncodeForSQL | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent | FeaturesEncodeExtended, - // these are not guaranteed to apply to a given field + // these are not guaranteed to apply to a given field - FeatureDecodeAllowed = 0 | FeatureDecodeUnreserved | FeatureDecodeFieldAllowed, + FeatureDecodeAllowed = 0 | FeatureDecodeUnreserved | FeatureDecodeFieldAllowed, - FeaturesMaybeDecode = 0 | FeaturesDecode | FeatureDecodeAllowed, + FeaturesMaybeDecode = 0 | FeaturesDecode | FeatureDecodeAllowed, - FeaturesMaybeEncode = 0 | FeaturesEncode, + FeaturesMaybeEncode = 0 | FeaturesEncode, - FeaturesEncodeDecode = 0 | FeaturesMaybeEncode | FeaturesMaybeDecode, + FeaturesEncodeDecode = 0 | FeaturesMaybeEncode | FeaturesMaybeDecode, - FeaturesAllEncoder = 0 | FeaturesEncodeDecode | FeatureDecodeANY | FeatureToLower | FeatureUpperEncoded | FeatureEncodeSpaceAsPlus, + FeaturesAllEncoder = 0 | FeaturesEncodeDecode | FeatureDecodeANY | FeatureToLower | FeatureUpperEncoded | FeatureEncodeSpaceAsPlus, - //============================== - FeaturesNormalizeSet = 0 | FeaturePathOperation | FeatureToLower | FeatureDecodeAllowed | FeatureEncodeSpaceAsPlus | FeatureEncodeForSQL | FeaturePathStripRootParent | FeatureTryToFix | FeatureUpperEncoded, + //============================== + FeaturesNormalizeSet = 0 | FeaturePathOperation | FeatureToLower | FeatureDecodeAllowed | FeatureEncodeSpaceAsPlus | FeatureEncodeForSQL | FeaturePathStripRootParent | FeatureTryToFix | FeatureUpperEncoded, - FeaturesDefault = 0 // it reproduces old parsedURL - | FeaturePathOperation | FeaturePathDenyRootParent | FeatureCheckHost, + FeaturesDefault = 0 // it reproduces old parsedURL + | FeaturePathOperation | FeaturePathDenyRootParent | FeatureCheckHost, - // essentially allows all valid RFC urls and keeps them as-is - FeaturesBare = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureAllowEmptyPath, + // essentially allows all valid RFC urls and keeps them as-is + FeaturesBare = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureAllowEmptyPath, - FeaturesAll = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureCheckHost | FeaturesNormalizeSet, + FeaturesAll = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureCheckHost | FeaturesNormalizeSet, - // Deprecated, use FeaturesRecommended - FeaturesRobotOld = 0 - // http://tools.ietf.org/html/rfc3986#section-6.2.2 - | FeatureToLower // 6.2.2.1 - | FeatureUpperEncoded // 6.2.2.1 - | FeatureDecodeUnreserved // 6.2.2.2 - | FeaturePathOperation // 6.2.2.3 - | FeaturePathDenyRootParent | FeatureSchemeKnown | FeatureConvertHostIDN | FeatureRemoteOnly | FeatureHashBangToEscapedFragment | FeatureCheckHost, + // Deprecated, use FeaturesRecommended + FeaturesRobotOld = 0 + // http://tools.ietf.org/html/rfc3986#section-6.2.2 + | FeatureToLower // 6.2.2.1 + | FeatureUpperEncoded // 6.2.2.1 + | FeatureDecodeUnreserved // 6.2.2.2 + | FeaturePathOperation // 6.2.2.3 + | FeaturePathDenyRootParent | FeatureSchemeKnown | FeatureConvertHostIDN | FeatureRemoteOnly | FeatureHashBangToEscapedFragment | FeatureCheckHost, - // these are mutually exclusive - FeaturesPath = 0 | FeaturePathDenyRootParent | FeaturePathStripRootParent, + // these are mutually exclusive + FeaturesPath = 0 | FeaturePathDenyRootParent | FeaturePathStripRootParent, - FeaturesEscapedFragment = 0 | FeatureEscapedToHashBangFragment | FeatureHashBangToEscapedFragment, + FeaturesEscapedFragment = 0 | FeatureEscapedToHashBangFragment | FeatureHashBangToEscapedFragment, - FeaturesCheckSpecialChar = 0 | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent, + FeaturesCheckSpecialChar = 0 | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent, - FeaturesEncodePChar = 0 | FeatureUpperEncoded | FeaturesEncodeDecode | FeaturesCheckSpecialChar, + FeaturesEncodePChar = 0 | FeatureUpperEncoded | FeaturesEncodeDecode | FeaturesCheckSpecialChar, - // http://wiki.yandex-team.ru/robot/newDesign/dups/normolization - FeaturesRecommended = 0 | FeatureSchemeKnown | FeatureRemoteOnly | FeatureToLower | FeatureCheckHost | FeatureConvertHostIDN | FeatureHashBangToEscapedFragment | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodeExtendedASCII | FeatureUpperEncoded | FeatureDecodeUnreserved | FeaturePathOperation | FeaturePathStripRootParent, + // http://wiki.yandex-team.ru/robot/newDesign/dups/normolization + FeaturesRecommended = 0 | FeatureSchemeKnown | FeatureRemoteOnly | FeatureToLower | FeatureCheckHost | FeatureConvertHostIDN | FeatureHashBangToEscapedFragment | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodeExtendedASCII | FeatureUpperEncoded | FeatureDecodeUnreserved | FeaturePathOperation | FeaturePathStripRootParent, - FeaturesRobot = FeaturesRecommended - }; + FeaturesRobot = FeaturesRecommended + }; }; - static inline int strnicmp(const char* lt, const char* rt, size_t len) { - return lt == rt ? 0 : ::strnicmp(lt, rt, len); - } + static inline int strnicmp(const char* lt, const char* rt, size_t len) { + return lt == rt ? 0 : ::strnicmp(lt, rt, len); + } - static inline int CompareNoCasePrefix(const TStringBuf& lt, const TStringBuf& rt) { - return strnicmp(lt.data(), rt.data(), rt.length()); - } + static inline int CompareNoCasePrefix(const TStringBuf& lt, const TStringBuf& rt) { + return strnicmp(lt.data(), rt.data(), rt.length()); + } - static inline bool EqualNoCase(const TStringBuf& lt, const TStringBuf& rt) { - return lt.length() == rt.length() && 0 == CompareNoCasePrefix(lt, rt); + static inline bool EqualNoCase(const TStringBuf& lt, const TStringBuf& rt) { + return lt.length() == rt.length() && 0 == CompareNoCasePrefix(lt, rt); } - static inline int CompareNoCase(const TStringBuf& lt, const TStringBuf& rt) { - if (lt.length() == rt.length()) - return CompareNoCasePrefix(lt, rt); - return lt.length() < rt.length() ? -1 : 1; + static inline int CompareNoCase(const TStringBuf& lt, const TStringBuf& rt) { + if (lt.length() == rt.length()) + return CompareNoCasePrefix(lt, rt); + return lt.length() < rt.length() ? -1 : 1; } - class TSchemeInfo { - public: - const TScheme::EKind Kind; - const ui16 Port; - const TStringBuf Str; - const ui32 FldReq; - TSchemeInfo(TScheme::EKind kind, TStringBuf str, ui32 fldReq = 0, ui16 port = 0) - : Kind(kind) - , Port(port) - , Str(str) - , FldReq(fldReq) - { - } - bool Matches(const TStringBuf& scheme) const { - return EqualNoCase(scheme, Str); - } - - public: - static const TSchemeInfo& Get(const TStringBuf& scheme); - static const TSchemeInfo& Get(TScheme::EKind scheme) { - return Registry[scheme]; - } - static TScheme::EKind GetKind(const TStringBuf& scheme) { - return Get(scheme).Kind; - } - static TStringBuf GetCanon(TScheme::EKind scheme) { - return Get(scheme).Str; - } - static ui16 GetDefaultPort(TScheme::EKind scheme) { - return Get(scheme).Port; - } - - private: - static const TSchemeInfo Registry[]; - }; - - struct TParseFlags { - const ui64 Allow; - const ui64 Extra; - TParseFlags(ui64 allow = 0, ui64 extra = 0) - : Allow(allow) - , Extra(extra) - { - } - ui64 operator&(const TParseFlags& flags) const { - return (Allow & flags.Allow) | (Extra & flags.Extra); - } - ui64 operator&(ui64 flags) const { - return (Allow & flags); - } - TParseFlags operator|(const TParseFlags& flags) const { - return TParseFlags(Allow | flags.Allow, Extra | flags.Extra); - } - TParseFlags Exclude(ui64 flags) const { - return TParseFlags(Allow & ~flags, Extra & ~flags); - } - }; - -#define FEATURE_NAME(f) _BitFeature##f -#define FEATURE_FLAG_NAME(f) Feature##f + class TSchemeInfo { + public: + const TScheme::EKind Kind; + const ui16 Port; + const TStringBuf Str; + const ui32 FldReq; + TSchemeInfo(TScheme::EKind kind, TStringBuf str, ui32 fldReq = 0, ui16 port = 0) + : Kind(kind) + , Port(port) + , Str(str) + , FldReq(fldReq) + { + } + bool Matches(const TStringBuf& scheme) const { + return EqualNoCase(scheme, Str); + } + + public: + static const TSchemeInfo& Get(const TStringBuf& scheme); + static const TSchemeInfo& Get(TScheme::EKind scheme) { + return Registry[scheme]; + } + static TScheme::EKind GetKind(const TStringBuf& scheme) { + return Get(scheme).Kind; + } + static TStringBuf GetCanon(TScheme::EKind scheme) { + return Get(scheme).Str; + } + static ui16 GetDefaultPort(TScheme::EKind scheme) { + return Get(scheme).Port; + } + + private: + static const TSchemeInfo Registry[]; + }; + + struct TParseFlags { + const ui64 Allow; + const ui64 Extra; + TParseFlags(ui64 allow = 0, ui64 extra = 0) + : Allow(allow) + , Extra(extra) + { + } + ui64 operator&(const TParseFlags& flags) const { + return (Allow & flags.Allow) | (Extra & flags.Extra); + } + ui64 operator&(ui64 flags) const { + return (Allow & flags); + } + TParseFlags operator|(const TParseFlags& flags) const { + return TParseFlags(Allow | flags.Allow, Extra | flags.Extra); + } + TParseFlags Exclude(ui64 flags) const { + return TParseFlags(Allow & ~flags, Extra & ~flags); + } + }; + +#define FEATURE_NAME(f) _BitFeature##f +#define FEATURE_FLAG_NAME(f) Feature##f #define FEATURE_FLAG(f) FEATURE_FLAG_NAME(f) = 1UL << FEATURE_NAME(f) - struct TQueryArg { - TStringBuf Name; - TStringBuf Value; - - private: - enum EBit { - FEATURE_NAME(Filter), - FEATURE_NAME(SortByName), - FEATURE_NAME(RemoveEmptyQuery), - FEATURE_NAME(RewriteDirty), - _FeatureMAX - }; - - public: - enum EPublic : ui32 { - FeatureMAX = _FeatureMAX, - FEATURE_FLAG(Filter), - FEATURE_FLAG(SortByName), - FEATURE_FLAG(RemoveEmptyQuery), - FEATURE_FLAG(RewriteDirty), - }; - - enum EProcessed { - // OK and clean. - ProcessedOK = 0, - - // OK, but query stored in internal buffer and TUri::Rewrite() is required. - ProcessedDirty = 1, - - ProcessedMalformed = 2, - ProcessedTooMany = 3, - }; - }; - - typedef bool (*TQueryArgFilter)(const TQueryArg& arg, void* filterData); + struct TQueryArg { + TStringBuf Name; + TStringBuf Value; + + private: + enum EBit { + FEATURE_NAME(Filter), + FEATURE_NAME(SortByName), + FEATURE_NAME(RemoveEmptyQuery), + FEATURE_NAME(RewriteDirty), + _FeatureMAX + }; + + public: + enum EPublic : ui32 { + FeatureMAX = _FeatureMAX, + FEATURE_FLAG(Filter), + FEATURE_FLAG(SortByName), + FEATURE_FLAG(RemoveEmptyQuery), + FEATURE_FLAG(RewriteDirty), + }; + + enum EProcessed { + // OK and clean. + ProcessedOK = 0, + + // OK, but query stored in internal buffer and TUri::Rewrite() is required. + ProcessedDirty = 1, + + ProcessedMalformed = 2, + ProcessedTooMany = 3, + }; + }; + + typedef bool (*TQueryArgFilter)(const TQueryArg& arg, void* filterData); #undef FEATURE_NAME #undef FEATURE_FLAG_NAME #undef FEATURE_FLAG - const char* FieldToString(const TField::EField& t); - const char* ParsedStateToString(const TState::EParsed& t); - const char* SchemeKindToString(const TScheme::EKind& t); + const char* FieldToString(const TField::EField& t); + const char* ParsedStateToString(const TState::EParsed& t); + const char* SchemeKindToString(const TScheme::EKind& t); } -Y_DECLARE_OUT_SPEC(inline, NUri::TField::EField, out, t) { +Y_DECLARE_OUT_SPEC(inline, NUri::TField::EField, out, t) { out << NUri::FieldToString(t); } -Y_DECLARE_OUT_SPEC(inline, NUri::TScheme::EKind, out, t) { +Y_DECLARE_OUT_SPEC(inline, NUri::TScheme::EKind, out, t) { out << NUri::SchemeKindToString(t); } -Y_DECLARE_OUT_SPEC(inline, NUri::TState::EParsed, out, t) { +Y_DECLARE_OUT_SPEC(inline, NUri::TState::EParsed, out, t) { out << NUri::ParsedStateToString(t); } -static inline ui16 DefaultPort(NUri::TScheme::EKind scheme) { +static inline ui16 DefaultPort(NUri::TScheme::EKind scheme) { return NUri::TSchemeInfo::GetDefaultPort(scheme); } -static inline NUri::TScheme::EKind SchemeKind(const TStringBuf& scheme) { +static inline NUri::TScheme::EKind SchemeKind(const TStringBuf& scheme) { return NUri::TSchemeInfo::GetKind(scheme); } diff --git a/library/cpp/uri/encode.cpp b/library/cpp/uri/encode.cpp index 34790c2d21..584fb1bac9 100644 --- a/library/cpp/uri/encode.cpp +++ b/library/cpp/uri/encode.cpp @@ -1,9 +1,9 @@ #include "encode.h" -#include <util/generic/singleton.h> +#include <util/generic/singleton.h> namespace NUri { - namespace NEncode { + namespace NEncode { // http://tools.ietf.org/html/rfc3986#section-2.2 #define GENDELIMS0 ":/?#[]@" #define SUBDELIMS0 "!$&'()*+,;=" @@ -24,197 +24,197 @@ namespace NUri { // rest allowed in qry, frag #define SUBDELIMS2 "!$'()*," - const TEncoder::TGrammar& TEncoder::Grammar() { - return *Singleton<TEncoder::TGrammar>(); - } - - // initialize the grammar map - TEncoder::TGrammar::TGrammar() { - // first set up unreserved characters safe in any field - const ui64 featUnres = TFeature::FeatureDecodeUnreserved; - AddRng('0', '9', ECFDigit, featUnres); - AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); - AddRng('a', 'z', ECFLower, featUnres); - Add(UNRESERVED, ECFUnres, featUnres); - - // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: - // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') - Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); - - // now field-specific subsets of reserved characters (gen-delims + sub-delims) - const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; - - Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); - Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); - - Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); - Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); - - // control chars - AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); - Add(0x7f, TFeature::FeatureEncodeCntrl); - - // '%' starts a percent-encoded sequence - Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); - - // extended ASCII - AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); - - // extended delims - Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); - - // add characters with other features - Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); - Add("'\"\\", TFeature::FeatureEncodeForSQL); - - GetMutable(':').EncodeFld |= TField::FlagUser; - GetMutable('?').EncodeFld |= TField::FlagPath; - GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; - GetMutable('&').EncodeFld |= TField::FlagQuery; - GetMutable('+').EncodeFld |= TField::FlagQuery; - } - - // should we decode an encoded character - bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const { - const ui64 myflags = flags & FeatFlags; - if (myflags & TFeature::FeaturesEncode) - return false; - if (myflags & TFeature::FeaturesDecode) - return true; - return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); - } - - const int dD = 'a' - 'A'; - - int TEncodeMapper::EncodeSym(unsigned char& ch) const { - const TCharFlags& chflags = TEncoder::GetFlags(ch); - const ui64 flags = Flags & chflags.FeatFlags; - - if (flags & TFeature::FeatureToLower) - ch += dD; - - if (Q_DecodeAny) - return -1; - - if (flags & TFeature::FeaturesEncode) - return 1; - - if (' ' == ch) { - if (Q_EncodeSpcAsPlus) - ch = '+'; - return 0; - } - - return 0; - } - - int TEncodeMapper::EncodeHex(unsigned char& ch) const { - const TCharFlags& chflags = TEncoder::GetFlags(ch); - const ui64 flags = Flags & chflags.FeatFlags; - - if (flags & TFeature::FeatureToLower) - ch += dD; - - if (Q_DecodeAny) - return -1; - - if (chflags.IsDecode(FldMask, Flags)) - return 0; - - if (' ' == ch) { - if (!Q_EncodeSpcAsPlus) - return 1; - ch = '+'; - return 0; - } - + const TEncoder::TGrammar& TEncoder::Grammar() { + return *Singleton<TEncoder::TGrammar>(); + } + + // initialize the grammar map + TEncoder::TGrammar::TGrammar() { + // first set up unreserved characters safe in any field + const ui64 featUnres = TFeature::FeatureDecodeUnreserved; + AddRng('0', '9', ECFDigit, featUnres); + AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); + AddRng('a', 'z', ECFLower, featUnres); + Add(UNRESERVED, ECFUnres, featUnres); + + // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: + // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') + Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); + + // now field-specific subsets of reserved characters (gen-delims + sub-delims) + const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; + + Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); + Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); + + Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); + Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); + + // control chars + AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); + Add(0x7f, TFeature::FeatureEncodeCntrl); + + // '%' starts a percent-encoded sequence + Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); + + // extended ASCII + AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); + + // extended delims + Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); + + // add characters with other features + Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); + Add("'\"\\", TFeature::FeatureEncodeForSQL); + + GetMutable(':').EncodeFld |= TField::FlagUser; + GetMutable('?').EncodeFld |= TField::FlagPath; + GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; + GetMutable('&').EncodeFld |= TField::FlagQuery; + GetMutable('+').EncodeFld |= TField::FlagQuery; + } + + // should we decode an encoded character + bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const { + const ui64 myflags = flags & FeatFlags; + if (myflags & TFeature::FeaturesEncode) + return false; + if (myflags & TFeature::FeaturesDecode) + return true; + return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); + } + + const int dD = 'a' - 'A'; + + int TEncodeMapper::EncodeSym(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (flags & TFeature::FeaturesEncode) + return 1; + + if (' ' == ch) { + if (Q_EncodeSpcAsPlus) + ch = '+'; + return 0; + } + + return 0; + } + + int TEncodeMapper::EncodeHex(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (chflags.IsDecode(FldMask, Flags)) + return 0; + + if (' ' == ch) { + if (!Q_EncodeSpcAsPlus) + return 1; + ch = '+'; + return 0; + } + return 1; - } - - bool TEncodeToMapper::Encode(unsigned char ch) const { - if (Q_DecodeAny) - return false; - - const TCharFlags& chflags = TEncoder::GetFlags(ch); - if (FldMask & chflags.EncodeFld) - return true; - - const ui64 flags = Flags & chflags.FeatFlags; - return (flags & TFeature::FeaturesEncode); - } - - TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) - : Out(out) - , FldSrc(fldsrc) - , FldDst(flddst) - , OutFlags(0) - , HexValue(0) - { - } - - IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { - static const char sHexCodes[] = "0123456789ABCDEF"; - return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; - } - - IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { - for (size_t i = 0; i != val.length(); ++i) - Encode(out, val[i]); - return out; - } - - IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) { - for (size_t i = 0; i != val.length(); ++i) { - const char c = val[i]; - if (IsAlnum(c)) - out << c; - else - Encode(out, c); - } - return out; - } - - IOutputStream& TEncoder::EncodeField( - IOutputStream& out, const TStringBuf& val, TField::EField fld) { - const ui32 fldmask = ui32(1) << fld; - for (size_t i = 0; i != val.length(); ++i) { - const char ch = val[i]; - if (GetFlags(ch).IsAllowed(fldmask)) - out << ch; - else - Encode(out, ch); - } - return out; - } - - IOutputStream& TEncoder::EncodeField( - IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) { - const ui32 fldmask = ui32(1) << fld; - for (size_t i = 0; i != val.length(); ++i) { - const char ch = val[i]; - if (GetFlags(ch).IsDecode(fldmask, flags)) - out << ch; - else - Encode(out, ch); - } - return out; - } - - void TEncoder::Do(unsigned char ch, int res) { - OutFlags |= GetFlags(ch).FeatFlags; - - bool escapepct = false; - if (0 < res) // definitely encode - escapepct = FldDst.Enabled(); - else if (0 != res || !FldDst.Enabled() || !FldDst.Encode(ch)) { - Out << ch; - return; - } - - Out << '%'; - if (escapepct) - Out.Write("25", 2); // '%' - Hex(Out, ch); - } + } + + bool TEncodeToMapper::Encode(unsigned char ch) const { + if (Q_DecodeAny) + return false; + + const TCharFlags& chflags = TEncoder::GetFlags(ch); + if (FldMask & chflags.EncodeFld) + return true; + + const ui64 flags = Flags & chflags.FeatFlags; + return (flags & TFeature::FeaturesEncode); + } + + TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) + : Out(out) + , FldSrc(fldsrc) + , FldDst(flddst) + , OutFlags(0) + , HexValue(0) + { + } + + IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { + static const char sHexCodes[] = "0123456789ABCDEF"; + return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; + } + + IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) + Encode(out, val[i]); + return out; + } + + IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) { + const char c = val[i]; + if (IsAlnum(c)) + out << c; + else + Encode(out, c); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsAllowed(fldmask)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsDecode(fldmask, flags)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + void TEncoder::Do(unsigned char ch, int res) { + OutFlags |= GetFlags(ch).FeatFlags; + + bool escapepct = false; + if (0 < res) // definitely encode + escapepct = FldDst.Enabled(); + else if (0 != res || !FldDst.Enabled() || !FldDst.Encode(ch)) { + Out << ch; + return; + } + + Out << '%'; + if (escapepct) + Out.Write("25", 2); // '%' + Hex(Out, ch); + } } } diff --git a/library/cpp/uri/encode.h b/library/cpp/uri/encode.h index ed1f1309ce..a9ece15427 100644 --- a/library/cpp/uri/encode.h +++ b/library/cpp/uri/encode.h @@ -5,278 +5,278 @@ #include <util/stream/output.h> namespace NUri { - namespace NEncode { -#define CHAR_TYPE_NAME(f) _ECT##f -#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f) - - enum ECharType { - CHAR_TYPE_NAME(Digit), - CHAR_TYPE_NAME(Lower), - CHAR_TYPE_NAME(Upper), - CHAR_TYPE_NAME(Unres), - CHAR_TYPE_NAME(Stdrd), - }; - - enum ECharFlag { - CHAR_TYPE_FLAG(Digit), - CHAR_TYPE_FLAG(Lower), - CHAR_TYPE_FLAG(Upper), - CHAR_TYPE_FLAG(Unres), - CHAR_TYPE_FLAG(Stdrd), - // compound group flags - ECGAlpha = ECFUpper | ECFLower, - ECGAlnum = ECGAlpha | ECFDigit, - ECGUnres = ECGAlnum | ECFUnres, - ECGStdrd = ECGUnres | ECFStdrd, - }; + namespace NEncode { +#define CHAR_TYPE_NAME(f) _ECT##f +#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f) + + enum ECharType { + CHAR_TYPE_NAME(Digit), + CHAR_TYPE_NAME(Lower), + CHAR_TYPE_NAME(Upper), + CHAR_TYPE_NAME(Unres), + CHAR_TYPE_NAME(Stdrd), + }; + + enum ECharFlag { + CHAR_TYPE_FLAG(Digit), + CHAR_TYPE_FLAG(Lower), + CHAR_TYPE_FLAG(Upper), + CHAR_TYPE_FLAG(Unres), + CHAR_TYPE_FLAG(Stdrd), + // compound group flags + ECGAlpha = ECFUpper | ECFLower, + ECGAlnum = ECGAlpha | ECFDigit, + ECGUnres = ECGAlnum | ECFUnres, + ECGStdrd = ECGUnres | ECFStdrd, + }; #undef CHAR_TYPE_NAME #undef CHAR_TYPE_FLAG - struct TCharFlags { - ui32 TypeFlags; - ui64 FeatFlags; - ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed - ui32 EncodeFld; // encode if shouldn't be treated as delimiter - TCharFlags(ui64 feat = 0) - : TypeFlags(0) - , FeatFlags(feat) - , DecodeFld(0) - , EncodeFld(0) - { - } - TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) - : TypeFlags(type) - , FeatFlags(feat) - , DecodeFld(decmask) - , EncodeFld(encmask) - { - } - TCharFlags& Add(const TCharFlags& val) { - TypeFlags |= val.TypeFlags; - FeatFlags |= val.FeatFlags; - DecodeFld |= val.DecodeFld; - EncodeFld |= val.EncodeFld; - return *this; - } - bool IsAllowed(ui32 fldmask) const { - return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask); - } - // should we decode an encoded character - bool IsDecode(ui32 fldmask, ui64 flags) const; - }; - - class TEncodeMapperBase { - protected: - TEncodeMapperBase() - : Flags(0) - , FldMask(0) - , Q_DecodeAny(false) - { - } - TEncodeMapperBase(ui64 flags, TField::EField fld) - : Flags(flags) - , FldMask(1u << fld) - , Q_DecodeAny(flags & TFeature::FeatureDecodeANY) - { - } - - protected: - const ui64 Flags; - const ui32 FldMask; - const bool Q_DecodeAny; // this is a special option for username/password - }; - - // maps a sym or hex character and indicates whether it has to be encoded - class TEncodeMapper - : public TEncodeMapperBase { - public: - TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) - : TEncodeMapperBase(flags, fld) - , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus) - { - } - // negative=sym, positive=hex, zero=maybesym - int EncodeSym(unsigned char&) const; - int EncodeHex(unsigned char&) const; - - protected: - const bool Q_EncodeSpcAsPlus; - }; - - // indicates whether a character has to be encoded when copying to a field - class TEncodeToMapper - : public TEncodeMapperBase { - public: - TEncodeToMapper() - : TEncodeMapperBase() - { - } - TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) - : TEncodeMapperBase(flags, fld) - { - } - bool Enabled() const { - return 0 != FldMask; - } - bool Encode(unsigned char) const; - }; - - class TEncoder { - public: - TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper()); - - ui64 ReEncode(const TStringBuf& url); - ui64 ReEncode(const char* str, size_t len) { - return ReEncode(TStringBuf(str, len)); - } - - protected: - static bool IsType(unsigned char c, ui64 flags) { - return GetFlags(c).TypeFlags & flags; - } - - public: - static bool IsDigit(unsigned char c) { - return IsType(c, ECFDigit); - } - static bool IsUpper(unsigned char c) { - return IsType(c, ECFUpper); - } - static bool IsLower(unsigned char c) { - return IsType(c, ECFLower); - } - static bool IsAlpha(unsigned char c) { - return IsType(c, ECGAlpha); - } - static bool IsAlnum(unsigned char c) { - return IsType(c, ECGAlnum); - } - static bool IsUnres(unsigned char c) { - return IsType(c, ECGUnres); - } - static const TCharFlags& GetFlags(unsigned char c) { - return Grammar().Get(c); - } - - public: - // process an encoded string, decoding safe chars and encoding unsafe - static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) { - TEncoder(out, srcfld).ReEncode(val); - return out; - } - static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) { - TEncoder(out, srcfld, dstfld).ReEncode(val); - return out; - } - - // see also UrlUnescape() from string/quote.h - static IOutputStream& Decode( - IOutputStream& out, const TStringBuf& val, ui64 flags) { - return ReEncode(out, val, flags | TFeature::FeatureDecodeANY); - } - - public: - // process a raw string or char, encode as needed - static IOutputStream& Hex(IOutputStream& out, unsigned char val); - static IOutputStream& Encode(IOutputStream& out, unsigned char val) { - out << '%'; - return Hex(out, val); - } - static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val); - static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val); - - static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld); - static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags); - - static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) { - return EncodeField(out, val, TField::FieldAllMAX); - } - - static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) { - return EncodeField(out, val, TField::FieldAllMAX, flags); - } - - public: - class TGrammar { - TCharFlags Map_[256]; - - public: - TGrammar(); - const TCharFlags& Get(unsigned char ch) const { - return Map_[ch]; - } - - TCharFlags& GetMutable(unsigned char ch) { - return Map_[ch]; - } - TCharFlags& Add(unsigned char ch, const TCharFlags& val) { - return GetMutable(ch).Add(val); - } - - void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) { - for (unsigned i = lo; i <= hi; ++i) - Add(i, val); - } - void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { - AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask)); - } - - void Add(const TStringBuf& set, const TCharFlags& val) { - for (size_t i = 0; i != set.length(); ++i) - Add(set[i], val); - } - void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { - Add(set, TCharFlags(type, feat, decmask, encmask)); - } - }; - - static const TGrammar& Grammar(); - - protected: - IOutputStream& Out; - const TEncodeMapper FldSrc; - const TEncodeToMapper FldDst; - ui64 OutFlags; - int HexValue; - - protected: - void HexReset() { - HexValue = 0; - } - - void HexDigit(char c) { - HexAdd(c - '0'); - } - void HexUpper(char c) { - HexAdd(c - 'A' + 10); - } - void HexLower(char c) { - HexAdd(c - 'a' + 10); - } - - void HexAdd(int val) { - HexValue <<= 4; - HexValue += val; - } - - protected: - void DoSym(unsigned char ch) { - const int res = FldSrc.EncodeSym(ch); - Do(ch, res); - } - void DoHex(unsigned char ch) { - const int res = FldSrc.EncodeHex(ch); - Do(ch, res); - } - void DoHex() { - DoHex(HexValue); - HexValue = 0; - } - void Do(unsigned char, int); - }; + struct TCharFlags { + ui32 TypeFlags; + ui64 FeatFlags; + ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed + ui32 EncodeFld; // encode if shouldn't be treated as delimiter + TCharFlags(ui64 feat = 0) + : TypeFlags(0) + , FeatFlags(feat) + , DecodeFld(0) + , EncodeFld(0) + { + } + TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) + : TypeFlags(type) + , FeatFlags(feat) + , DecodeFld(decmask) + , EncodeFld(encmask) + { + } + TCharFlags& Add(const TCharFlags& val) { + TypeFlags |= val.TypeFlags; + FeatFlags |= val.FeatFlags; + DecodeFld |= val.DecodeFld; + EncodeFld |= val.EncodeFld; + return *this; + } + bool IsAllowed(ui32 fldmask) const { + return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask); + } + // should we decode an encoded character + bool IsDecode(ui32 fldmask, ui64 flags) const; + }; + + class TEncodeMapperBase { + protected: + TEncodeMapperBase() + : Flags(0) + , FldMask(0) + , Q_DecodeAny(false) + { + } + TEncodeMapperBase(ui64 flags, TField::EField fld) + : Flags(flags) + , FldMask(1u << fld) + , Q_DecodeAny(flags & TFeature::FeatureDecodeANY) + { + } + + protected: + const ui64 Flags; + const ui32 FldMask; + const bool Q_DecodeAny; // this is a special option for username/password + }; + + // maps a sym or hex character and indicates whether it has to be encoded + class TEncodeMapper + : public TEncodeMapperBase { + public: + TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) + : TEncodeMapperBase(flags, fld) + , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus) + { + } + // negative=sym, positive=hex, zero=maybesym + int EncodeSym(unsigned char&) const; + int EncodeHex(unsigned char&) const; + + protected: + const bool Q_EncodeSpcAsPlus; + }; + + // indicates whether a character has to be encoded when copying to a field + class TEncodeToMapper + : public TEncodeMapperBase { + public: + TEncodeToMapper() + : TEncodeMapperBase() + { + } + TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) + : TEncodeMapperBase(flags, fld) + { + } + bool Enabled() const { + return 0 != FldMask; + } + bool Encode(unsigned char) const; + }; + + class TEncoder { + public: + TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper()); + + ui64 ReEncode(const TStringBuf& url); + ui64 ReEncode(const char* str, size_t len) { + return ReEncode(TStringBuf(str, len)); + } + + protected: + static bool IsType(unsigned char c, ui64 flags) { + return GetFlags(c).TypeFlags & flags; + } + + public: + static bool IsDigit(unsigned char c) { + return IsType(c, ECFDigit); + } + static bool IsUpper(unsigned char c) { + return IsType(c, ECFUpper); + } + static bool IsLower(unsigned char c) { + return IsType(c, ECFLower); + } + static bool IsAlpha(unsigned char c) { + return IsType(c, ECGAlpha); + } + static bool IsAlnum(unsigned char c) { + return IsType(c, ECGAlnum); + } + static bool IsUnres(unsigned char c) { + return IsType(c, ECGUnres); + } + static const TCharFlags& GetFlags(unsigned char c) { + return Grammar().Get(c); + } + + public: + // process an encoded string, decoding safe chars and encoding unsafe + static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) { + TEncoder(out, srcfld).ReEncode(val); + return out; + } + static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) { + TEncoder(out, srcfld, dstfld).ReEncode(val); + return out; + } + + // see also UrlUnescape() from string/quote.h + static IOutputStream& Decode( + IOutputStream& out, const TStringBuf& val, ui64 flags) { + return ReEncode(out, val, flags | TFeature::FeatureDecodeANY); + } + + public: + // process a raw string or char, encode as needed + static IOutputStream& Hex(IOutputStream& out, unsigned char val); + static IOutputStream& Encode(IOutputStream& out, unsigned char val) { + out << '%'; + return Hex(out, val); + } + static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val); + static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val); + + static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld); + static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags); + + static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) { + return EncodeField(out, val, TField::FieldAllMAX); + } + + static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) { + return EncodeField(out, val, TField::FieldAllMAX, flags); + } + + public: + class TGrammar { + TCharFlags Map_[256]; + + public: + TGrammar(); + const TCharFlags& Get(unsigned char ch) const { + return Map_[ch]; + } + + TCharFlags& GetMutable(unsigned char ch) { + return Map_[ch]; + } + TCharFlags& Add(unsigned char ch, const TCharFlags& val) { + return GetMutable(ch).Add(val); + } + + void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) { + for (unsigned i = lo; i <= hi; ++i) + Add(i, val); + } + void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { + AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask)); + } + + void Add(const TStringBuf& set, const TCharFlags& val) { + for (size_t i = 0; i != set.length(); ++i) + Add(set[i], val); + } + void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { + Add(set, TCharFlags(type, feat, decmask, encmask)); + } + }; + + static const TGrammar& Grammar(); + + protected: + IOutputStream& Out; + const TEncodeMapper FldSrc; + const TEncodeToMapper FldDst; + ui64 OutFlags; + int HexValue; + + protected: + void HexReset() { + HexValue = 0; + } + + void HexDigit(char c) { + HexAdd(c - '0'); + } + void HexUpper(char c) { + HexAdd(c - 'A' + 10); + } + void HexLower(char c) { + HexAdd(c - 'a' + 10); + } + + void HexAdd(int val) { + HexValue <<= 4; + HexValue += val; + } + + protected: + void DoSym(unsigned char ch) { + const int res = FldSrc.EncodeSym(ch); + Do(ch, res); + } + void DoHex(unsigned char ch) { + const int res = FldSrc.EncodeHex(ch); + Do(ch, res); + } + void DoHex() { + DoHex(HexValue); + HexValue = 0; + } + void Do(unsigned char, int); + }; } - using TEncoder = NEncode::TEncoder; + using TEncoder = NEncode::TEncoder; } diff --git a/library/cpp/uri/http_url.h b/library/cpp/uri/http_url.h index 92a332a1f6..7c8e8d844d 100644 --- a/library/cpp/uri/http_url.h +++ b/library/cpp/uri/http_url.h @@ -2,11 +2,11 @@ #include "uri.h" #include "other.h" - + // XXX: use NUri::TUri directly; this whole file is for backwards compatibility class THttpURL - : public NUri::TUri { + : public NUri::TUri { public: typedef TField::EFlags TFlags; typedef TField::EField TField; @@ -16,42 +16,42 @@ public: public: enum { FeatureUnescapeStandard = TFeature::FeatureDecodeStandard, - FeatureEscSpace = TFeature::FeatureEncodeSpaceAsPlus, - FeatureEscapeUnescaped = TFeature::FeatureEncodeExtendedASCII, - FeatureNormalPath = TFeature::FeaturePathStripRootParent, + FeatureEscSpace = TFeature::FeatureEncodeSpaceAsPlus, + FeatureEscapeUnescaped = TFeature::FeatureEncodeExtendedASCII, + FeatureNormalPath = TFeature::FeaturePathStripRootParent, }; public: THttpURL(unsigned defaultPort = 80) : TUri(defaultPort) - { - } + { + } - THttpURL(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0) + THttpURL(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0) : TUri(host, port, path, query, scheme, defaultPort) - { - } + { + } THttpURL(const TUri& url) : TUri(url) - { - } + { + } public: // XXX: don't use any of these legacy methods below public: // use TUri::GetField() instead /// will return null-terminated if fld is not dirty - const char* Get(EField fld) const { + const char* Get(EField fld) const { return GetField(fld).data(); } public: // use TUriUpdate class so that Rewrite() is only called once - void Set(EField field, const TStringBuf& value) { + void Set(EField field, const TStringBuf& value) { if (SetInMemory(field, value)) Rewrite(); } template <size_t size> - void Set(EField field, const char (&value)[size]) { + void Set(EField field, const char (&value)[size]) { if (SetInMemory(field, value)) Rewrite(); } @@ -59,19 +59,19 @@ public: // use TUriUpdate class so that Rewrite() is only called once public: // use TUri::FldXXX methods for better control // Partial quick set of the field, can be called for // multiple fields - bool SetInMemory(EField field, const TStringBuf& value) { + bool SetInMemory(EField field, const TStringBuf& value) { return FldMemSet(field, value); } // clears a field - void Reset(EField field) { + void Reset(EField field) { FldClr(field); } }; -static inline const char* HttpURLParsedStateToString(const NUri::TState::EParsed& t) { +static inline const char* HttpURLParsedStateToString(const NUri::TState::EParsed& t) { return NUri::ParsedStateToString(t); } -static inline const char* HttpUrlSchemeKindToString(const NUri::TScheme::EKind& t) { +static inline const char* HttpUrlSchemeKindToString(const NUri::TScheme::EKind& t) { return NUri::SchemeKindToString(t); } diff --git a/library/cpp/uri/location.cpp b/library/cpp/uri/location.cpp index 2eb099beda..a6a4d11ffa 100644 --- a/library/cpp/uri/location.cpp +++ b/library/cpp/uri/location.cpp @@ -2,30 +2,30 @@ #include "uri.h" namespace NUri { - static const int URI_PARSE_FLAGS = - (TFeature::FeaturesRecommended | TFeature::FeatureConvertHostIDN | TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureEncodePercent) & ~TFeature::FeatureHashBangToEscapedFragment; + static const int URI_PARSE_FLAGS = + (TFeature::FeaturesRecommended | TFeature::FeatureConvertHostIDN | TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureEncodePercent) & ~TFeature::FeatureHashBangToEscapedFragment; - TString ResolveRedirectLocation(const TStringBuf& baseUrl, - const TStringBuf& location) { - TUri baseUri; - TUri locationUri; + TString ResolveRedirectLocation(const TStringBuf& baseUrl, + const TStringBuf& location) { + TUri baseUri; + TUri locationUri; - // Parse base URL. - if (baseUri.Parse(baseUrl, URI_PARSE_FLAGS) != NUri::TState::ParsedOK) { - return ""; - } - // Parse location with respect to the base URL. - if (locationUri.Parse(location, baseUri, URI_PARSE_FLAGS) != NUri::TState::ParsedOK) { - return ""; - } - // Inherit fragment. - if (!locationUri.GetField(NUri::TField::FieldFragment)) { - NUri::TUriUpdate update(locationUri); - update.Set(NUri::TField::FieldFragment, baseUri.GetField(NUri::TField::FieldFragment)); - } - TString res; - locationUri.Print(res, NUri::TField::FlagAllFields); - return res; + // Parse base URL. + if (baseUri.Parse(baseUrl, URI_PARSE_FLAGS) != NUri::TState::ParsedOK) { + return ""; + } + // Parse location with respect to the base URL. + if (locationUri.Parse(location, baseUri, URI_PARSE_FLAGS) != NUri::TState::ParsedOK) { + return ""; + } + // Inherit fragment. + if (!locationUri.GetField(NUri::TField::FieldFragment)) { + NUri::TUriUpdate update(locationUri); + update.Set(NUri::TField::FieldFragment, baseUri.GetField(NUri::TField::FieldFragment)); + } + TString res; + locationUri.Print(res, NUri::TField::FlagAllFields); + return res; } } diff --git a/library/cpp/uri/location.h b/library/cpp/uri/location.h index 9478510fd3..0f533fe0b5 100644 --- a/library/cpp/uri/location.h +++ b/library/cpp/uri/location.h @@ -3,11 +3,11 @@ #include <util/generic/string.h> namespace NUri { - /** + /** * Resolve Location header according to https://tools.ietf.org/html/rfc7231#section-7.1.2 * * @return Resolved location's url or empty string in case of any error. */ - TString ResolveRedirectLocation(const TStringBuf& baseUrl, const TStringBuf& location); + TString ResolveRedirectLocation(const TStringBuf& baseUrl, const TStringBuf& location); } diff --git a/library/cpp/uri/location_ut.cpp b/library/cpp/uri/location_ut.cpp index e3140fffff..26a0f64471 100644 --- a/library/cpp/uri/location_ut.cpp +++ b/library/cpp/uri/location_ut.cpp @@ -5,36 +5,36 @@ Y_UNIT_TEST_SUITE(TResolveRedirectTests) { Y_UNIT_TEST(Absolute) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub"), "http://redir-example.com/sub"); + NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub"), "http://redir-example.com/sub"); } Y_UNIT_TEST(AbsWithFragment) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub#Hello"), "http://redir-example.com/sub#Hello"); + NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub#Hello"), "http://redir-example.com/sub#Hello"); UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com/#Hello", "http://redir-example.com/sub"), "http://redir-example.com/sub#Hello"); + NUri::ResolveRedirectLocation("http://example.com/#Hello", "http://redir-example.com/sub"), "http://redir-example.com/sub#Hello"); } Y_UNIT_TEST(Rel) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", "/sub"), "http://example.com/sub"); + NUri::ResolveRedirectLocation("http://example.com", "/sub"), "http://example.com/sub"); } Y_UNIT_TEST(RelWithFragment) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", "/sub#Hello"), "http://example.com/sub#Hello"); + NUri::ResolveRedirectLocation("http://example.com", "/sub#Hello"), "http://example.com/sub#Hello"); UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com/#Hello", "/sub"), "http://example.com/sub#Hello"); + NUri::ResolveRedirectLocation("http://example.com/#Hello", "/sub"), "http://example.com/sub#Hello"); } Y_UNIT_TEST(WrongLocation) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", ""), ""); + NUri::ResolveRedirectLocation("http://example.com", ""), ""); } Y_UNIT_TEST(WrongBase) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("", "http://example.com"), ""); + NUri::ResolveRedirectLocation("", "http://example.com"), ""); } Y_UNIT_TEST(HashBangIsNothingSpecial) { UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub#!Hello"), "http://redir-example.com/sub#!Hello"); + NUri::ResolveRedirectLocation("http://example.com", "http://redir-example.com/sub#!Hello"), "http://redir-example.com/sub#!Hello"); UNIT_ASSERT_EQUAL( - NUri::ResolveRedirectLocation("http://example.com/#!Hello", "http://redir-example.com/sub"), "http://redir-example.com/sub#!Hello"); + NUri::ResolveRedirectLocation("http://example.com/#!Hello", "http://redir-example.com/sub"), "http://redir-example.com/sub#!Hello"); } } diff --git a/library/cpp/uri/other.cpp b/library/cpp/uri/other.cpp index 7e02d70e25..b23a5b68a9 100644 --- a/library/cpp/uri/other.cpp +++ b/library/cpp/uri/other.cpp @@ -1,35 +1,35 @@ #include "other.h" - + #include <util/string/util.h> #include <util/system/yassert.h> - + /********************************************************/ /********************************************************/ static const Tr InvertTr(".:/?#", "\005\004\003\002\001"); static const Tr RevertTr("\005\004\003\002\001", ".:/?#"); -void TrspChars(char* s) { +void TrspChars(char* s) { InvertTr.Do(s); } -void UnTrspChars(char* s) { +void UnTrspChars(char* s) { RevertTr.Do(s); } -void TrspChars(char* s, size_t l) { +void TrspChars(char* s, size_t l) { InvertTr.Do(s, l); } -void UnTrspChars(char* s, size_t l) { +void UnTrspChars(char* s, size_t l) { RevertTr.Do(s, l); } -void TrspChars(const char* s, char* d) { +void TrspChars(const char* s, char* d) { InvertTr.Do(s, d); } -void UnTrspChars(const char* s, char* d) { +void UnTrspChars(const char* s, char* d) { RevertTr.Do(s, d); } @@ -57,14 +57,14 @@ void InvertDomain(char* begin, char* end) { e = end; for (size_t i = 0, n = e - b; i < n / 2; ++i) DoSwap(b[i], b[n - i - 1]); - if (e == end) + if (e == end) break; b = e + 1; } } void InvertUrl(char* begin, char* end) { - char* slash = strchr(begin, '/'); + char* slash = strchr(begin, '/'); if (slash) { *slash = 0; } diff --git a/library/cpp/uri/other.h b/library/cpp/uri/other.h index 621d89a32f..7aec22e77b 100644 --- a/library/cpp/uri/other.h +++ b/library/cpp/uri/other.h @@ -5,12 +5,12 @@ // Some functions for inverted url representation // No scheme cut-off, no 80th port normalization -void TrspChars(char* s); -void UnTrspChars(char* s); -void TrspChars(char* s, size_t l); -void UnTrspChars(char* s, size_t l); -void TrspChars(const char* s, char* d); -void UnTrspChars(const char* s, char* d); +void TrspChars(char* s); +void UnTrspChars(char* s); +void TrspChars(char* s, size_t l); +void UnTrspChars(char* s, size_t l); +void TrspChars(const char* s, char* d); +void UnTrspChars(const char* s, char* d); void InvertDomain(char* begin, char* end); diff --git a/library/cpp/uri/parse.cpp b/library/cpp/uri/parse.cpp index 76d8a9fee3..1db4e008c4 100644 --- a/library/cpp/uri/parse.cpp +++ b/library/cpp/uri/parse.cpp @@ -1,207 +1,207 @@ -#include "parse.h" +#include "parse.h" #include "common.h" #include "encode.h" namespace NUri { - const TParseFlags TParser::FieldFlags[] = - { - TParseFlags(0 // FieldScheme - | TFeature::FeatureToLower, - 0) - - , - TParseFlags(0 // FieldUsername - | TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent, - 0 | TFeature::FeatureToLower) - - , - TParseFlags(0 // FieldPassword - | TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent, - 0 | TFeature::FeatureToLower) - - , - TParseFlags(0 // FieldHost - | TFeature::FeatureToLower | TFeature::FeatureUpperEncoded | (TFeature::FeaturesMaybeEncode & ~TFeature::FeatureEncodeExtendedDelim), - 0 | TFeature::FeaturesMaybeDecode) - - , - TParseFlags(0 // FieldPort - , - 0) - - , - TParseFlags(0 // FieldPath - | TFeature::FeaturesEncodePChar | TFeature::FeaturePathOperation, - 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus) - - , - TParseFlags(0 // FieldQuery - | TFeature::FeaturesEncodePChar | TFeature::FeatureEncodeSpaceAsPlus, - 0 | TFeature::FeatureToLower) - - , - TParseFlags(0 // FieldFragment - | TFeature::FeaturesEncodePChar, - 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus)}; - - namespace NParse { - void TRange::AddRange(const TRange& range, ui64 mask) { - FlagsAllPlaintext |= range.FlagsAllPlaintext; - // update only if flags apply here - mask &= range.FlagsEncodeMasked; - if (0 == mask) - return; - FlagsEncodeMasked |= mask; - if (mask & TFeature::FeaturesMaybeEncode) - Encode += range.Encode; - if (mask & TFeature::FeaturesDecode) - Decode += range.Decode; - } - - } - - void TParser::copyRequirementsImpl(const char* ptr) { - Y_ASSERT(0 != CurRange.FlagsAllPlaintext); - Y_UNUSED(ptr); + const TParseFlags TParser::FieldFlags[] = + { + TParseFlags(0 // FieldScheme + | TFeature::FeatureToLower, + 0) + + , + TParseFlags(0 // FieldUsername + | TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent, + 0 | TFeature::FeatureToLower) + + , + TParseFlags(0 // FieldPassword + | TFeature::FeatureDecodeANY | TFeature::FeaturesDecode | TFeature::FeatureEncodePercent, + 0 | TFeature::FeatureToLower) + + , + TParseFlags(0 // FieldHost + | TFeature::FeatureToLower | TFeature::FeatureUpperEncoded | (TFeature::FeaturesMaybeEncode & ~TFeature::FeatureEncodeExtendedDelim), + 0 | TFeature::FeaturesMaybeDecode) + + , + TParseFlags(0 // FieldPort + , + 0) + + , + TParseFlags(0 // FieldPath + | TFeature::FeaturesEncodePChar | TFeature::FeaturePathOperation, + 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus) + + , + TParseFlags(0 // FieldQuery + | TFeature::FeaturesEncodePChar | TFeature::FeatureEncodeSpaceAsPlus, + 0 | TFeature::FeatureToLower) + + , + TParseFlags(0 // FieldFragment + | TFeature::FeaturesEncodePChar, + 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus)}; + + namespace NParse { + void TRange::AddRange(const TRange& range, ui64 mask) { + FlagsAllPlaintext |= range.FlagsAllPlaintext; + // update only if flags apply here + mask &= range.FlagsEncodeMasked; + if (0 == mask) + return; + FlagsEncodeMasked |= mask; + if (mask & TFeature::FeaturesMaybeEncode) + Encode += range.Encode; + if (mask & TFeature::FeaturesDecode) + Decode += range.Decode; + } + + } + + void TParser::copyRequirementsImpl(const char* ptr) { + Y_ASSERT(0 != CurRange.FlagsAllPlaintext); + Y_UNUSED(ptr); #ifdef DO_PRN - PrintHead(ptr, __FUNCTION__) + PrintHead(ptr, __FUNCTION__) << " all=[" << IntToString<16>(CurRange.FlagsAllPlaintext) << "] enc=[" << IntToString<16>(CurRange.FlagsEncodeMasked) - << " & " << IntToString<16>(Flags.Allow | Flags.Extra) << "]"; - PrintTail(CurRange.Beg, ptr); + << " & " << IntToString<16>(Flags.Allow | Flags.Extra) << "]"; + PrintTail(CurRange.Beg, ptr); #endif - for (int i = 0; i < TField::FieldUrlMAX; ++i) { - const TField::EField fld = TField::EField(i); - TSection& section = Sections[fld]; - // update only sections in progress - if (nullptr == section.Beg) - continue; - // and overlapping with the range - if (nullptr != section.End && section.End < CurRange.Beg) - continue; -#ifdef DO_PRN - PrintHead(ptr, __FUNCTION__, fld) - << " all=[" << IntToString<16>(CurRange.FlagsAllPlaintext) - << "] enc=[" << IntToString<16>(CurRange.FlagsEncodeMasked) - << " & " << IntToString<16>(GetFieldFlags(fld)) << "]"; - PrintTail(section.Beg, ptr); -#endif - section.AddRange(CurRange, GetFieldFlags(fld)); - } - CurRange.Reset(); + for (int i = 0; i < TField::FieldUrlMAX; ++i) { + const TField::EField fld = TField::EField(i); + TSection& section = Sections[fld]; + // update only sections in progress + if (nullptr == section.Beg) + continue; + // and overlapping with the range + if (nullptr != section.End && section.End < CurRange.Beg) + continue; +#ifdef DO_PRN + PrintHead(ptr, __FUNCTION__, fld) + << " all=[" << IntToString<16>(CurRange.FlagsAllPlaintext) + << "] enc=[" << IntToString<16>(CurRange.FlagsEncodeMasked) + << " & " << IntToString<16>(GetFieldFlags(fld)) << "]"; + PrintTail(section.Beg, ptr); +#endif + section.AddRange(CurRange, GetFieldFlags(fld)); + } + CurRange.Reset(); } - void TParser::PctEndImpl(const char* ptr) { + void TParser::PctEndImpl(const char* ptr) { #ifdef DO_PRN - PrintHead(PctBegin, __FUNCTION__); - PrintTail(PctBegin, ptr); + PrintHead(PctBegin, __FUNCTION__); + PrintTail(PctBegin, ptr); #else - Y_UNUSED(ptr); + Y_UNUSED(ptr); #endif - setRequirement(PctBegin, TEncoder::GetFlags('%').FeatFlags); - PctBegin = nullptr; - } + setRequirement(PctBegin, TEncoder::GetFlags('%').FeatFlags); + PctBegin = nullptr; + } - void TParser::HexSet(const char* ptr) { - Y_ASSERT(nullptr != PctBegin); + void TParser::HexSet(const char* ptr) { + Y_ASSERT(nullptr != PctBegin); #ifdef DO_PRN - PrintHead(ptr, __FUNCTION__); - PrintTail(PctBegin, ptr + 1); + PrintHead(ptr, __FUNCTION__); + PrintTail(PctBegin, ptr + 1); #endif - PctBegin = nullptr; - const unsigned char ch = HexValue; - ui64 flags = TEncoder::GetFlags('%').FeatFlags | TEncoder::GetFlags(ch).FeatFlags; + PctBegin = nullptr; + const unsigned char ch = HexValue; + ui64 flags = TEncoder::GetFlags('%').FeatFlags | TEncoder::GetFlags(ch).FeatFlags; - setRequirementExcept(ptr, flags, TFeature::FeaturesMaybeEncode); - } + setRequirementExcept(ptr, flags, TFeature::FeaturesMaybeEncode); + } - TState::EParsed TParser::ParseImpl() { + TState::EParsed TParser::ParseImpl() { #ifdef DO_PRN - PrintHead(UriStr.data(), "[Parsing]") << "URL"; - PrintTail(UriStr); + PrintHead(UriStr.data(), "[Parsing]") << "URL"; + PrintTail(UriStr); #endif - const bool ok = doParse(UriStr.data(), UriStr.length()); + const bool ok = doParse(UriStr.data(), UriStr.length()); #ifdef DO_PRN - Cdbg << (ok ? "[Parsed]" : "[Failed]"); - for (int idx = 0; idx < TField::FieldUrlMAX; ++idx) { - const TSection& section = Sections[idx]; - if (section.IsSet()) - Cdbg << ' ' << TField::EField(idx) << "=[" << section.Get() << ']'; - } - Cdbg << Endl; + Cdbg << (ok ? "[Parsed]" : "[Failed]"); + for (int idx = 0; idx < TField::FieldUrlMAX; ++idx) { + const TSection& section = Sections[idx]; + if (section.IsSet()) + Cdbg << ' ' << TField::EField(idx) << "=[" << section.Get() << ']'; + } + Cdbg << Endl; #endif - if (!ok) { - if (!(Flags & TFeature::FeatureTryToFix) || !Sections[TField::FieldFrag].Beg) - return TState::ParsedBadFormat; - //Here: error was in fragment, just ignore it - ResetSection(TField::FieldFrag); - } - - if ((Flags & TFeature::FeatureDenyNetworkPath) && IsNetPath()) - return TState::ParsedBadFormat; - - const TSection& scheme = Sections[TField::FieldScheme]; - Scheme = scheme.IsSet() ? TSchemeInfo::GetKind(scheme.Get()) : TScheme::SchemeEmpty; - const TSchemeInfo& schemeInfo = TSchemeInfo::Get(Scheme); - - if (IsRootless()) { - // opaque case happens - if (schemeInfo.FldReq & TField::FlagHost) - return TState::ParsedBadFormat; + if (!ok) { + if (!(Flags & TFeature::FeatureTryToFix) || !Sections[TField::FieldFrag].Beg) + return TState::ParsedBadFormat; + //Here: error was in fragment, just ignore it + ResetSection(TField::FieldFrag); + } - if (TScheme::SchemeEmpty == Scheme) - return TState::ParsedBadScheme; + if ((Flags & TFeature::FeatureDenyNetworkPath) && IsNetPath()) + return TState::ParsedBadFormat; - if (Flags & TFeature::FeatureAllowRootless) - return TState::ParsedOK; + const TSection& scheme = Sections[TField::FieldScheme]; + Scheme = scheme.IsSet() ? TSchemeInfo::GetKind(scheme.Get()) : TScheme::SchemeEmpty; + const TSchemeInfo& schemeInfo = TSchemeInfo::Get(Scheme); - if (!(Flags & TFeature::FeatureSchemeFlexible)) - return TState::ParsedBadScheme; + if (IsRootless()) { + // opaque case happens + if (schemeInfo.FldReq & TField::FlagHost) + return TState::ParsedBadFormat; - return TState::ParsedRootless; - } + if (TScheme::SchemeEmpty == Scheme) + return TState::ParsedBadScheme; - checkSectionCollision(TField::FieldUser, TField::FieldHost); - checkSectionCollision(TField::FieldPass, TField::FieldPort); + if (Flags & TFeature::FeatureAllowRootless) + return TState::ParsedOK; - if (0 == (Flags & TFeature::FeatureAuthSupported)) - if (Sections[TField::FieldUser].IsSet() || Sections[TField::FieldPass].IsSet()) - return TState::ParsedBadAuth; + if (!(Flags & TFeature::FeatureSchemeFlexible)) + return TState::ParsedBadScheme; - TSection& host = Sections[TField::FieldHost]; - if (host.IsSet()) - for (; host.End != host.Beg && '.' == host.End[-1];) - --host.End; + return TState::ParsedRootless; + } - if (scheme.IsSet()) { - ui64 wantCareFlags = 0; - switch (Scheme) { - case TScheme::SchemeHTTP: - break; - case TScheme::SchemeEmpty: - Scheme = TScheme::SchemeUnknown; + checkSectionCollision(TField::FieldUser, TField::FieldHost); + checkSectionCollision(TField::FieldPass, TField::FieldPort); + + if (0 == (Flags & TFeature::FeatureAuthSupported)) + if (Sections[TField::FieldUser].IsSet() || Sections[TField::FieldPass].IsSet()) + return TState::ParsedBadAuth; + + TSection& host = Sections[TField::FieldHost]; + if (host.IsSet()) + for (; host.End != host.Beg && '.' == host.End[-1];) + --host.End; + + if (scheme.IsSet()) { + ui64 wantCareFlags = 0; + switch (Scheme) { + case TScheme::SchemeHTTP: + break; + case TScheme::SchemeEmpty: + Scheme = TScheme::SchemeUnknown; [[fallthrough]]; - case TScheme::SchemeUnknown: - wantCareFlags = - TFeature::FeatureSchemeFlexible | TFeature::FeatureNoRelPath; - break; - default: - wantCareFlags = - TFeature::FeatureSchemeFlexible | TFeature::FeatureSchemeKnown; - break; - } - - if (0 != wantCareFlags && 0 == (Flags & wantCareFlags)) - return TState::ParsedBadScheme; - if ((schemeInfo.FldReq & TField::FlagHost) || (Flags & TFeature::FeatureRemoteOnly)) - if (!host.IsSet() || 0 == host.Len()) - return TState::ParsedBadFormat; + case TScheme::SchemeUnknown: + wantCareFlags = + TFeature::FeatureSchemeFlexible | TFeature::FeatureNoRelPath; + break; + default: + wantCareFlags = + TFeature::FeatureSchemeFlexible | TFeature::FeatureSchemeKnown; + break; + } + + if (0 != wantCareFlags && 0 == (Flags & wantCareFlags)) + return TState::ParsedBadScheme; + if ((schemeInfo.FldReq & TField::FlagHost) || (Flags & TFeature::FeatureRemoteOnly)) + if (!host.IsSet() || 0 == host.Len()) + return TState::ParsedBadFormat; } - return TState::ParsedOK; + return TState::ParsedOK; } } diff --git a/library/cpp/uri/parse.h b/library/cpp/uri/parse.h index f4b56ba1c4..ca2358e572 100644 --- a/library/cpp/uri/parse.h +++ b/library/cpp/uri/parse.h @@ -13,349 +13,349 @@ #include <util/system/yassert.h> namespace NUri { - class TParser; - - namespace NParse { - class TRange { - public: - const char* Beg; - ui64 FlagsEncodeMasked; - ui64 FlagsAllPlaintext; - ui32 Encode; - ui32 Decode; - - public: - TRange(const char* beg = nullptr) - : Beg(beg) - , FlagsEncodeMasked(0) - , FlagsAllPlaintext(0) - , Encode(0) - , Decode(0) - { - } - - void Reset(const char* beg = nullptr) { - *this = TRange(beg); - } - - void AddRange(const TRange& range, ui64 mask); - - void AddFlag(const char* ptr, ui64 mask, ui64 flag) { - if (0 != flag) - AddFlagImpl(ptr, mask, flag, flag); - } - - void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) { - if (0 != flag) - AddFlagImpl(ptr, mask, flag & ~exclflag, flag); - } - - void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) { - if (0 != flag) - AddFlagImpl(ptr, mask, flag, flag, exclmask); - } - - void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) { - if (0 != flag) - AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask); - } - - private: - void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) { - AddFlagAllPlaintextImpl(ptr, plainflag); - AddFlagEncodeMaskedImpl(encflag & mask); - } - - void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) { - AddFlagAllPlaintextImpl(ptr, plainflag); - if (0 == (mask & exclmask)) - AddFlagEncodeMaskedImpl(encflag & mask); - } - - void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) { - if (nullptr == Beg) - Beg = ptr; - FlagsAllPlaintext |= flag; - } - - void AddFlagEncodeMaskedImpl(ui64 flag) { - if (0 == flag) - return; - FlagsEncodeMasked |= flag; - if (flag & TFeature::FeaturesMaybeEncode) - ++Encode; - else if (flag & TFeature::FeaturesDecode) - ++Decode; - } - }; + class TParser; + + namespace NParse { + class TRange { + public: + const char* Beg; + ui64 FlagsEncodeMasked; + ui64 FlagsAllPlaintext; + ui32 Encode; + ui32 Decode; + + public: + TRange(const char* beg = nullptr) + : Beg(beg) + , FlagsEncodeMasked(0) + , FlagsAllPlaintext(0) + , Encode(0) + , Decode(0) + { + } + + void Reset(const char* beg = nullptr) { + *this = TRange(beg); + } + + void AddRange(const TRange& range, ui64 mask); + + void AddFlag(const char* ptr, ui64 mask, ui64 flag) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag, flag); + } + + void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag & ~exclflag, flag); + } + + void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag, flag, exclmask); + } + + void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask); + } + + private: + void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) { + AddFlagAllPlaintextImpl(ptr, plainflag); + AddFlagEncodeMaskedImpl(encflag & mask); + } + + void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) { + AddFlagAllPlaintextImpl(ptr, plainflag); + if (0 == (mask & exclmask)) + AddFlagEncodeMaskedImpl(encflag & mask); + } + + void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) { + if (nullptr == Beg) + Beg = ptr; + FlagsAllPlaintext |= flag; + } + + void AddFlagEncodeMaskedImpl(ui64 flag) { + if (0 == flag) + return; + FlagsEncodeMasked |= flag; + if (flag & TFeature::FeaturesMaybeEncode) + ++Encode; + else if (flag & TFeature::FeaturesDecode) + ++Decode; + } + }; } - class TSection - : protected NParse::TRange { - private: - friend class TParser; - - private: - const char* End; - - TSection(const char* beg = nullptr) - : NParse::TRange(beg) - , End(nullptr) - { - } - - void Reset() { - Enter(nullptr); - } - - void Reset(const char* pc) { - Y_ASSERT(!Beg || !pc || Beg < pc); - Reset(); - } - - void Enter(const char* pc) { - *this = TSection(pc); - } - - bool Leave(const char* pc) { - Y_ASSERT(Beg); - End = pc; - return true; - } - - void Set(const TStringBuf& buf) { - Enter(buf.data()); - Leave(buf.data() + buf.length()); - } - - public: - bool IsSet() const { - return End; - } - - TStringBuf Get() const { - return TStringBuf(Beg, End); - } - - size_t Len() const { - return End - Beg; - } - - size_t DecodedLen() const { - return Len() - 2 * Decode; - } - - size_t EncodedLen() const { - return 2 * Encode + DecodedLen(); - } - - ui32 GetEncode() const { - return Encode; - } - - ui32 GetDecode() const { - return Decode; - } - - ui64 GetFlagsEncode() const { - return FlagsEncodeMasked; - } - - ui64 GetFlagsAllPlaintext() const { - return FlagsAllPlaintext; - } - }; - - class TParser { - public: - TSection Sections[TField::FieldUrlMAX]; - TScheme::EKind Scheme; - const TParseFlags Flags; - const TStringBuf UriStr; - TState::EParsed State; - ECharset Enc; - - public: - TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8) - : Scheme(TScheme::SchemeEmpty) - , Flags(flags | TFeature::FeatureDecodeANY) - , UriStr(uri) - , State(TState::ParsedEmpty) - , Enc(enc) - , HexValue(0) - , PctBegin(nullptr) - { - Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation) - // can't define all of them - || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath)); - State = ParseImpl(); - } - - public: - const TSection& Get(TField::EField fld) const { - return Sections[fld]; - } - TSection& GetMutable(TField::EField fld) { - return Sections[fld]; - } - bool Has(TField::EField fld) const { - return Get(fld).IsSet(); - } - bool IsNetPath() const { - return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1]; - } - bool IsRootless() const { - return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]); - } - // for RFC 2396 compatibility - bool IsOpaque() const { - return IsRootless(); - } - static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) { - return FieldFlags[fld] & flags; - } - ui64 GetFieldFlags(TField::EField fld) const { - return GetFieldFlags(fld, Flags); - } - - protected: - static const TParseFlags FieldFlags[TField::FieldUrlMAX]; - TSection::TRange CurRange; - unsigned HexValue; - const char* PctBegin; + class TSection + : protected NParse::TRange { + private: + friend class TParser; + + private: + const char* End; + + TSection(const char* beg = nullptr) + : NParse::TRange(beg) + , End(nullptr) + { + } + + void Reset() { + Enter(nullptr); + } + + void Reset(const char* pc) { + Y_ASSERT(!Beg || !pc || Beg < pc); + Reset(); + } + + void Enter(const char* pc) { + *this = TSection(pc); + } + + bool Leave(const char* pc) { + Y_ASSERT(Beg); + End = pc; + return true; + } + + void Set(const TStringBuf& buf) { + Enter(buf.data()); + Leave(buf.data() + buf.length()); + } + + public: + bool IsSet() const { + return End; + } + + TStringBuf Get() const { + return TStringBuf(Beg, End); + } + + size_t Len() const { + return End - Beg; + } + + size_t DecodedLen() const { + return Len() - 2 * Decode; + } + + size_t EncodedLen() const { + return 2 * Encode + DecodedLen(); + } + + ui32 GetEncode() const { + return Encode; + } + + ui32 GetDecode() const { + return Decode; + } + + ui64 GetFlagsEncode() const { + return FlagsEncodeMasked; + } + + ui64 GetFlagsAllPlaintext() const { + return FlagsAllPlaintext; + } + }; + + class TParser { + public: + TSection Sections[TField::FieldUrlMAX]; + TScheme::EKind Scheme; + const TParseFlags Flags; + const TStringBuf UriStr; + TState::EParsed State; + ECharset Enc; + + public: + TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8) + : Scheme(TScheme::SchemeEmpty) + , Flags(flags | TFeature::FeatureDecodeANY) + , UriStr(uri) + , State(TState::ParsedEmpty) + , Enc(enc) + , HexValue(0) + , PctBegin(nullptr) + { + Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation) + // can't define all of them + || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath)); + State = ParseImpl(); + } + + public: + const TSection& Get(TField::EField fld) const { + return Sections[fld]; + } + TSection& GetMutable(TField::EField fld) { + return Sections[fld]; + } + bool Has(TField::EField fld) const { + return Get(fld).IsSet(); + } + bool IsNetPath() const { + return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1]; + } + bool IsRootless() const { + return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]); + } + // for RFC 2396 compatibility + bool IsOpaque() const { + return IsRootless(); + } + static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) { + return FieldFlags[fld] & flags; + } + ui64 GetFieldFlags(TField::EField fld) const { + return GetFieldFlags(fld, Flags); + } + + protected: + static const TParseFlags FieldFlags[TField::FieldUrlMAX]; + TSection::TRange CurRange; + unsigned HexValue; + const char* PctBegin; #ifdef DO_PRN - IOutputStream& PrintAddr(const char* ptr) const { - return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] "; - } - - IOutputStream& PrintHead(const char* ptr, const char* func) const { - return PrintAddr(ptr) << func << " "; - } - - IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const { - return PrintHead(ptr, func) << fld; - } - - IOutputStream& PrintTail(const TStringBuf& val) const { - return Cdbg << " [" << val << "]" << Endl; - } - IOutputStream& PrintTail(const char* beg, const char* end) const { - return PrintTail(TStringBuf(beg, end)); - } + IOutputStream& PrintAddr(const char* ptr) const { + return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] "; + } + + IOutputStream& PrintHead(const char* ptr, const char* func) const { + return PrintAddr(ptr) << func << " "; + } + + IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const { + return PrintHead(ptr, func) << fld; + } + + IOutputStream& PrintTail(const TStringBuf& val) const { + return Cdbg << " [" << val << "]" << Endl; + } + IOutputStream& PrintTail(const char* beg, const char* end) const { + return PrintTail(TStringBuf(beg, end)); + } #endif - void ResetSection(TField::EField fld, const char* pc = nullptr) { + void ResetSection(TField::EField fld, const char* pc = nullptr) { #ifdef DO_PRN - PrintHead(pc, __FUNCTION__, fld); - PrintTail(pc); + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); #endif - Sections[fld].Reset(pc); - } + Sections[fld].Reset(pc); + } - void storeSection(const TStringBuf& val, TField::EField fld) { + void storeSection(const TStringBuf& val, TField::EField fld) { #ifdef DO_PRN - PrintHead(val.data(), __FUNCTION__, fld); - PrintTail(val); + PrintHead(val.data(), __FUNCTION__, fld); + PrintTail(val); #endif - Sections[fld].Set(val); - } + Sections[fld].Set(val); + } - void startSection(const char* pc, TField::EField fld) { + void startSection(const char* pc, TField::EField fld) { #ifdef DO_PRN - PrintHead(pc, __FUNCTION__, fld); - PrintTail(pc); + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); #endif - copyRequirements(pc); - Sections[fld].Enter(pc); - } + copyRequirements(pc); + Sections[fld].Enter(pc); + } - void finishSection(const char* pc, TField::EField fld) { + void finishSection(const char* pc, TField::EField fld) { #ifdef DO_PRN - PrintHead(pc, __FUNCTION__, fld); - PrintTail(pc); + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); #endif - if (Sections[fld].Leave(pc)) - copyRequirements(pc); - } + if (Sections[fld].Leave(pc)) + copyRequirements(pc); + } - void setRequirement(const char* ptr, ui64 flags) { + void setRequirement(const char* ptr, ui64 flags) { #ifdef DO_PRN - PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) - << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); - PrintTail(ptr); + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); #endif - CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags); - } + CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags); + } - void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) { + void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) { #ifdef DO_PRN - PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) - << " & exclflag=" << IntToString<16>(exclflag) - << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); - PrintTail(ptr); + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & exclflag=" << IntToString<16>(exclflag) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); #endif - CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag); - } + CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag); + } - void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) { + void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) { #ifdef DO_PRN - PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) - << " & exclmask=" << IntToString<16>(exclmask) - << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); - PrintTail(ptr); + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & exclmask=" << IntToString<16>(exclmask) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); #endif - CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask); - } - - void copyRequirementsImpl(const char* ptr); - void copyRequirements(const char* ptr) { - PctEnd(ptr); - if (nullptr != CurRange.Beg && CurRange.Beg != ptr) - copyRequirementsImpl(ptr); - } - - void HexDigit(const char* ptr, char c) { - Y_UNUSED(ptr); - HexAdd(c - '0'); - } - void HexUpper(const char* ptr, char c) { - setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded); - HexAdd(c - 'A' + 10); - } - void HexLower(const char* ptr, char c) { - setRequirement(ptr, TFeature::FeatureUpperEncoded); - HexAdd(c - 'a' + 10); - } - void HexAdd(unsigned val) { - HexValue <<= 4; - HexValue += val; - } - void HexReset() { - HexValue = 0; - } - void HexSet(const char* ptr); - - void PctEndImpl(const char* ptr); - void PctEnd(const char* ptr) { - if (nullptr != PctBegin && ptr != PctBegin) - PctEndImpl(ptr); - } - void PctBeg(const char* ptr) { - PctEnd(ptr); - HexReset(); - PctBegin = ptr; - } - - void checkSectionCollision(TField::EField fld1, TField::EField fld2) { - if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) { - Sections[fld1].Reset(); - } - } - - bool doParse(const char* str_beg, size_t length); - TState::EParsed ParseImpl(); - }; + CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask); + } + + void copyRequirementsImpl(const char* ptr); + void copyRequirements(const char* ptr) { + PctEnd(ptr); + if (nullptr != CurRange.Beg && CurRange.Beg != ptr) + copyRequirementsImpl(ptr); + } + + void HexDigit(const char* ptr, char c) { + Y_UNUSED(ptr); + HexAdd(c - '0'); + } + void HexUpper(const char* ptr, char c) { + setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded); + HexAdd(c - 'A' + 10); + } + void HexLower(const char* ptr, char c) { + setRequirement(ptr, TFeature::FeatureUpperEncoded); + HexAdd(c - 'a' + 10); + } + void HexAdd(unsigned val) { + HexValue <<= 4; + HexValue += val; + } + void HexReset() { + HexValue = 0; + } + void HexSet(const char* ptr); + + void PctEndImpl(const char* ptr); + void PctEnd(const char* ptr) { + if (nullptr != PctBegin && ptr != PctBegin) + PctEndImpl(ptr); + } + void PctBeg(const char* ptr) { + PctEnd(ptr); + HexReset(); + PctBegin = ptr; + } + + void checkSectionCollision(TField::EField fld1, TField::EField fld2) { + if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) { + Sections[fld1].Reset(); + } + } + + bool doParse(const char* str_beg, size_t length); + TState::EParsed ParseImpl(); + }; } diff --git a/library/cpp/uri/qargs.cpp b/library/cpp/uri/qargs.cpp index 7dd18e7a16..23058f8102 100644 --- a/library/cpp/uri/qargs.cpp +++ b/library/cpp/uri/qargs.cpp @@ -2,32 +2,32 @@ #include <string> namespace NUri { - namespace NOnStackArgsList { - struct TQArgNode { - TQArgNode* Prev; - TQArgNode* Next; - - TStringBuf Name; - TStringBuf Value; - TStringBuf All; - }; - - TQArgNode MakeArg(TQArgNode* prev) { - return {prev, 0, {}, {}, {}}; - } - - const char* SkipDelimiter(const char* str, const char* end) { - while (str != end) - if (*str == '&') - ++str; - else - break; - return str; - } - - /// return next pos or 0 if error - const char* ExtractArgData(const char* pos, const char* end, TQArgNode* arg) { - const char* nameStart = pos; + namespace NOnStackArgsList { + struct TQArgNode { + TQArgNode* Prev; + TQArgNode* Next; + + TStringBuf Name; + TStringBuf Value; + TStringBuf All; + }; + + TQArgNode MakeArg(TQArgNode* prev) { + return {prev, 0, {}, {}, {}}; + } + + const char* SkipDelimiter(const char* str, const char* end) { + while (str != end) + if (*str == '&') + ++str; + else + break; + return str; + } + + /// return next pos or 0 if error + const char* ExtractArgData(const char* pos, const char* end, TQArgNode* arg) { + const char* nameStart = pos; const char* nextArg = strchr(pos, '&'); const char* valueStart = strchr(pos, '='); if (valueStart && nextArg && valueStart < nextArg) // a=1& or a=& @@ -57,223 +57,223 @@ namespace NUri { arg->All = arg->Name; return end; } - } - - // arg can be null - TQArgNode* GetHead(TQArgNode* arg) { - TQArgNode* prev = arg; - while (prev) { - arg = prev; - prev = prev->Prev; - } - return arg; - } - - // arg can be null - TQArgNode* GetLast(TQArgNode* arg) { - TQArgNode* next = arg; - while (next) { - arg = next; - next = arg->Next; - } - return arg; - } - - int CompareName(const TQArgNode* l, const TQArgNode* r) { - return l->Name.compare(r->Name); - } - - TQArgNode* Move(TQArgNode* before, TQArgNode* node) { - TQArgNode* tn = node->Next; - TQArgNode* tp = node->Prev; - - node->Prev = before->Prev; - if (node->Prev) - node->Prev->Next = node; - - node->Next = before; - before->Prev = node; - - if (tn) - tn->Prev = tp; - if (tp) - tp->Next = tn; - - return node; - } - - // return new head - TQArgNode* QSortByName(TQArgNode* iter, TQArgNode* last) { - if (iter == last) - return iter; - if (iter->Next == last) { - int c = CompareName(iter, last); - return c <= 0 ? iter : Move(iter, last); - } else { - TQArgNode* pivot = iter; - iter = iter->Next; - TQArgNode* head = 0; - TQArgNode* tail = 0; - TQArgNode* tailPartitionStart = pivot; - while (true) { - TQArgNode* next = iter->Next; - int c = CompareName(iter, pivot); - int sign = (0 < c) - (c < 0); - switch (sign) { - case -1: - head = head ? Move(head, iter) : Move(pivot, iter); - break; - - case 0: - pivot = Move(pivot, iter); - break; - - case 1: - tail = iter; - break; - } - - if (iter == last) - break; - iter = next; - } - - if (head) - head = QSortByName(head, pivot->Prev); - if (tail) - QSortByName(tailPartitionStart->Next, tail); - return head ? head : pivot; - } + } + + // arg can be null + TQArgNode* GetHead(TQArgNode* arg) { + TQArgNode* prev = arg; + while (prev) { + arg = prev; + prev = prev->Prev; + } + return arg; + } + + // arg can be null + TQArgNode* GetLast(TQArgNode* arg) { + TQArgNode* next = arg; + while (next) { + arg = next; + next = arg->Next; + } + return arg; + } + + int CompareName(const TQArgNode* l, const TQArgNode* r) { + return l->Name.compare(r->Name); + } + + TQArgNode* Move(TQArgNode* before, TQArgNode* node) { + TQArgNode* tn = node->Next; + TQArgNode* tp = node->Prev; + + node->Prev = before->Prev; + if (node->Prev) + node->Prev->Next = node; + + node->Next = before; + before->Prev = node; + + if (tn) + tn->Prev = tp; + if (tp) + tp->Next = tn; + + return node; + } + + // return new head + TQArgNode* QSortByName(TQArgNode* iter, TQArgNode* last) { + if (iter == last) + return iter; + if (iter->Next == last) { + int c = CompareName(iter, last); + return c <= 0 ? iter : Move(iter, last); + } else { + TQArgNode* pivot = iter; + iter = iter->Next; + TQArgNode* head = 0; + TQArgNode* tail = 0; + TQArgNode* tailPartitionStart = pivot; + while (true) { + TQArgNode* next = iter->Next; + int c = CompareName(iter, pivot); + int sign = (0 < c) - (c < 0); + switch (sign) { + case -1: + head = head ? Move(head, iter) : Move(pivot, iter); + break; + + case 0: + pivot = Move(pivot, iter); + break; + + case 1: + tail = iter; + break; + } + + if (iter == last) + break; + iter = next; + } + + if (head) + head = QSortByName(head, pivot->Prev); + if (tail) + QSortByName(tailPartitionStart->Next, tail); + return head ? head : pivot; + } } } - using namespace NOnStackArgsList; - - class TQueryArgProcessing::Pipeline { - public: - Pipeline(TQueryArgProcessing& parent, TUri& subject) - : Parent(parent) - , Subject(subject) - , ArgsCount(0) - , IsDirty(false) - { - } - - TQueryArg::EProcessed Process() { - const TStringBuf& query = Subject.GetField(NUri::TField::FieldQuery); - if (query.empty()) - return ProcessEmpty(); - - const char* start = query.data(); - return Parse(start, start + query.length(), 0); - } - - TQueryArg::EProcessed ProcessEmpty() { - if (Parent.Flags & TQueryArg::FeatureRemoveEmptyQuery) - Subject.FldClr(NUri::TField::FieldQuery); - - return TQueryArg::ProcessedOK; - } - - TQueryArg::EProcessed Parse(const char* str, const char* end, TQArgNode* prev) { - str = SkipDelimiter(str, end); - - if (str == end) { - TQArgNode* head = GetHead(prev); - TQArgNode* last = GetLast(prev); - return FinalizeParsing(head, last); - } else { - TQArgNode current = MakeArg(prev); - const char* next = ExtractArgData(str, end, ¤t); - if (!next) - return TQueryArg::ProcessedMalformed; - - TQArgNode* tail = ApplyFilter(prev, ¤t); - - if (++ArgsCount > MaxCount) - return TQueryArg::ProcessedTooMany; - - return Parse(next, end, tail); - } - } - - TQArgNode* ApplyFilter(TQArgNode* prev, TQArgNode* current) { - if (Parent.Flags & TQueryArg::FeatureFilter) { - TQueryArg arg = {current->Name, current->Value}; - if (!Parent.Filter(arg, Parent.FilterData)) { - IsDirty = true; - return prev; - } - } - - if (prev) - prev->Next = current; - return current; + using namespace NOnStackArgsList; + + class TQueryArgProcessing::Pipeline { + public: + Pipeline(TQueryArgProcessing& parent, TUri& subject) + : Parent(parent) + , Subject(subject) + , ArgsCount(0) + , IsDirty(false) + { } - TQueryArg::EProcessed FinalizeParsing(TQArgNode* head, TQArgNode* last) { - if (Parent.Flags & TQueryArg::FeatureSortByName) { - head = QSortByName(head, last); - IsDirty = true; - } - - if (!IsDirty) - return TQueryArg::ProcessedOK; - - bool dirty = Render(head); - - bool rewrite = Parent.Flags & TQueryArg::FeatureRewriteDirty; - if (dirty && rewrite) - Subject.Rewrite(); - return (!dirty || rewrite) ? TQueryArg::ProcessedOK : TQueryArg::ProcessedDirty; - } - - bool Render(TQArgNode* head) { - std::string& result = Parent.Buffer; - result.clear(); - result.reserve(Subject.GetField(NUri::TField::FieldQuery).length()); - bool first = true; - while (head) { - if (first) - first = false; - else - result.append("&"); - - result.append(head->All); - head = head->Next; - } - - if (result.empty()) - return RenderEmpty(); - else - return Subject.FldMemSet(NUri::TField::FieldQuery, result); - } - - bool RenderEmpty() { - if (Parent.Flags & TQueryArg::FeatureRemoveEmptyQuery) - Subject.FldClr(NUri::TField::FieldQuery); - return false; - } - - private: - TQueryArgProcessing& Parent; - TUri& Subject; - - unsigned ArgsCount; - bool IsDirty; - - static const unsigned MaxCount = 100; - }; - - TQueryArgProcessing::TQueryArgProcessing(ui32 flags, TQueryArgFilter filter, void* filterData) - : Flags(flags) - , Filter(filter) - , FilterData(filterData) - { - } - - TQueryArg::EProcessed TQueryArgProcessing::Process(TUri& uri) { - Pipeline pipeline(*this, uri); - return pipeline.Process(); - } + TQueryArg::EProcessed Process() { + const TStringBuf& query = Subject.GetField(NUri::TField::FieldQuery); + if (query.empty()) + return ProcessEmpty(); + + const char* start = query.data(); + return Parse(start, start + query.length(), 0); + } + + TQueryArg::EProcessed ProcessEmpty() { + if (Parent.Flags & TQueryArg::FeatureRemoveEmptyQuery) + Subject.FldClr(NUri::TField::FieldQuery); + + return TQueryArg::ProcessedOK; + } + + TQueryArg::EProcessed Parse(const char* str, const char* end, TQArgNode* prev) { + str = SkipDelimiter(str, end); + + if (str == end) { + TQArgNode* head = GetHead(prev); + TQArgNode* last = GetLast(prev); + return FinalizeParsing(head, last); + } else { + TQArgNode current = MakeArg(prev); + const char* next = ExtractArgData(str, end, ¤t); + if (!next) + return TQueryArg::ProcessedMalformed; + + TQArgNode* tail = ApplyFilter(prev, ¤t); + + if (++ArgsCount > MaxCount) + return TQueryArg::ProcessedTooMany; + + return Parse(next, end, tail); + } + } + + TQArgNode* ApplyFilter(TQArgNode* prev, TQArgNode* current) { + if (Parent.Flags & TQueryArg::FeatureFilter) { + TQueryArg arg = {current->Name, current->Value}; + if (!Parent.Filter(arg, Parent.FilterData)) { + IsDirty = true; + return prev; + } + } + + if (prev) + prev->Next = current; + return current; + } + + TQueryArg::EProcessed FinalizeParsing(TQArgNode* head, TQArgNode* last) { + if (Parent.Flags & TQueryArg::FeatureSortByName) { + head = QSortByName(head, last); + IsDirty = true; + } + + if (!IsDirty) + return TQueryArg::ProcessedOK; + + bool dirty = Render(head); + + bool rewrite = Parent.Flags & TQueryArg::FeatureRewriteDirty; + if (dirty && rewrite) + Subject.Rewrite(); + return (!dirty || rewrite) ? TQueryArg::ProcessedOK : TQueryArg::ProcessedDirty; + } + + bool Render(TQArgNode* head) { + std::string& result = Parent.Buffer; + result.clear(); + result.reserve(Subject.GetField(NUri::TField::FieldQuery).length()); + bool first = true; + while (head) { + if (first) + first = false; + else + result.append("&"); + + result.append(head->All); + head = head->Next; + } + + if (result.empty()) + return RenderEmpty(); + else + return Subject.FldMemSet(NUri::TField::FieldQuery, result); + } + + bool RenderEmpty() { + if (Parent.Flags & TQueryArg::FeatureRemoveEmptyQuery) + Subject.FldClr(NUri::TField::FieldQuery); + return false; + } + + private: + TQueryArgProcessing& Parent; + TUri& Subject; + + unsigned ArgsCount; + bool IsDirty; + + static const unsigned MaxCount = 100; + }; + + TQueryArgProcessing::TQueryArgProcessing(ui32 flags, TQueryArgFilter filter, void* filterData) + : Flags(flags) + , Filter(filter) + , FilterData(filterData) + { + } + + TQueryArg::EProcessed TQueryArgProcessing::Process(TUri& uri) { + Pipeline pipeline(*this, uri); + return pipeline.Process(); + } } diff --git a/library/cpp/uri/qargs.h b/library/cpp/uri/qargs.h index 5a22330549..fcba7cbd0c 100644 --- a/library/cpp/uri/qargs.h +++ b/library/cpp/uri/qargs.h @@ -5,18 +5,18 @@ #include <string> namespace NUri { - class TQueryArgProcessing { - public: - TQueryArgProcessing(ui32 flags, TQueryArgFilter filter = 0, void* filterData = 0); + class TQueryArgProcessing { + public: + TQueryArgProcessing(ui32 flags, TQueryArgFilter filter = 0, void* filterData = 0); - TQueryArg::EProcessed Process(TUri& uri); + TQueryArg::EProcessed Process(TUri& uri); - private: - ui32 Flags; - TQueryArgFilter Filter; - void* FilterData; + private: + ui32 Flags; + TQueryArgFilter Filter; + void* FilterData; - class Pipeline; - std::string Buffer; - }; + class Pipeline; + std::string Buffer; + }; } diff --git a/library/cpp/uri/uri-ru_ut.cpp b/library/cpp/uri/uri-ru_ut.cpp index a7306cbc64..ec35a164d2 100644 --- a/library/cpp/uri/uri-ru_ut.cpp +++ b/library/cpp/uri/uri-ru_ut.cpp @@ -1,7 +1,7 @@ #include "uri_ut.h" #include <library/cpp/charset/recyr.hh> #include <library/cpp/html/entity/htmlentity.h> -#include <util/system/maxlen.h> +#include <util/system/maxlen.h> namespace NUri { namespace { @@ -15,137 +15,137 @@ namespace NUri { Y_UNIT_TEST_SUITE(URLTestRU) { Y_UNIT_TEST(test_httpURL2) { - TUri url; - UNIT_ASSERT_VALUES_EQUAL(url.Parse("g:h"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("http:g"), TState::ParsedBadFormat); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("/../g"), TState::ParsedBadPath); - const char* const UpCaseUrl = "http://www.TEST.Ru:80/InDex.html"; - UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.TEST.Ru/InDex.html"); - UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl, TFeature::FeaturesDefault | TFeature::FeatureToLower), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.test.ru/InDex.html"); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme), "http:"); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme | TField::FlagHost), "http://www.test.ru"); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost), "www.test.ru"); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost | TField::FlagPath), "www.test.ru/InDex.html"); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagQuery), ""); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.TEST.Ru:90/InDex.html"), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostPort | TField::FlagPath), "www.TEST.Ru:90/InDex.html"); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK); - UNIT_ASSERT(!url.IsValidAbs()); - UNIT_ASSERT(url.IsNull(TField::FlagHost)); - UNIT_ASSERT(!url.IsNull(TField::FlagPath)); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagPath), "www.ya.ru/index.html"); + TUri url; + UNIT_ASSERT_VALUES_EQUAL(url.Parse("g:h"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("http:g"), TState::ParsedBadFormat); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("/../g"), TState::ParsedBadPath); + const char* const UpCaseUrl = "http://www.TEST.Ru:80/InDex.html"; + UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.TEST.Ru/InDex.html"); + UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl, TFeature::FeaturesDefault | TFeature::FeatureToLower), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.test.ru/InDex.html"); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme), "http:"); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme | TField::FlagHost), "http://www.test.ru"); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost), "www.test.ru"); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost | TField::FlagPath), "www.test.ru/InDex.html"); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagQuery), ""); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.TEST.Ru:90/InDex.html"), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostPort | TField::FlagPath), "www.TEST.Ru:90/InDex.html"); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK); + UNIT_ASSERT(!url.IsValidAbs()); + UNIT_ASSERT(url.IsNull(TField::FlagHost)); + UNIT_ASSERT(!url.IsNull(TField::FlagPath)); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagPath), "www.ya.ru/index.html"); UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10")), TState::ParsedOK); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10")); UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"), - TFeature::FeaturesDefault | TFeature::FeatureEncodeExtendedASCII), - TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), + TFeature::FeaturesDefault | TFeature::FeatureEncodeExtendedASCII), + TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/%D4%C5%D3%D4\\'\".html?%D4%C5%D3%D4\\'\"=%D4%C5%D3%D4+\\'\"%10")); UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"), - TFeature::FeaturesDefault | TFeature::FeatureEncodeForSQL), - TState::ParsedOK); + TFeature::FeaturesDefault | TFeature::FeatureEncodeForSQL), + TState::ParsedOK); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ%5C%27%22.html?ФЕУФ%5C%27%22=ФЕУФ+%5C%27%22%10")); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("q/%33%26%13%2f%2b%30%20", - TFeature::FeaturesDefault | TFeature::FeatureDecodeStandard), - TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "q/3%26%13/%2B0%20"); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("q/%33%26%13%2f%2b%30%20", + TFeature::FeaturesDefault | TFeature::FeatureDecodeStandard), + TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "q/3%26%13/%2B0%20"); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.prime-tass.ru/news/0/{656F5BAE-6677-4762-9BED-9E3B77E72055}.uif"), - TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path"), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path", TFeature::FeaturesRobot), TState::ParsedOK); - } + UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.prime-tass.ru/news/0/{656F5BAE-6677-4762-9BED-9E3B77E72055}.uif"), + TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path"), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path", TFeature::FeaturesRobot), TState::ParsedOK); + } const TString links[] = { - "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240", "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240", - "./viewtopic.php?p=74&sid=6#p74", "./viewtopic.php?p=74&sid=6#p74", - "viewtopic.php?p=9313&sid=8#9313", "viewtopic.php?p=9313&sid=8#9313", - "profile.php?mode=viewprofile&u=-1#drafts&sid=a6e5989cee27adb5996bfff044af04ca", "profile.php?mode=viewprofile&u=-1#drafts&sid=a6e5989cee27adb5996bfff044af04ca", + "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240", "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240", + "./viewtopic.php?p=74&sid=6#p74", "./viewtopic.php?p=74&sid=6#p74", + "viewtopic.php?p=9313&sid=8#9313", "viewtopic.php?p=9313&sid=8#9313", + "profile.php?mode=viewprofile&u=-1#drafts&sid=a6e5989cee27adb5996bfff044af04ca", "profile.php?mode=viewprofile&u=-1#drafts&sid=a6e5989cee27adb5996bfff044af04ca", - "images\nil.jpg", "images%0Ail.jpg", - "http://caedebaturque.termez.su\r\n/?article=218", "http://caedebaturque.termez.su%0D%0A/?article=218", + "images\nil.jpg", "images%0Ail.jpg", + "http://caedebaturque.termez.su\r\n/?article=218", "http://caedebaturque.termez.su%0D%0A/?article=218", AsKoi8("javascript:window.external.AddFavorite(\'http://www.humor.look.ru/\',\'Злобные Деды Морозы!!!\')"), "javascript:window.external.AddFavorite(\'http://www.humor.look.ru/\',\'%FA%CC%CF%C2%CE%D9%C5%20%E4%C5%C4%D9%20%ED%CF%D2%CF%DA%D9!!!\')", - "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&showresults=posts&sid=8", "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&showresults=posts&sid=8", + "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&showresults=posts&sid=8", "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&showresults=posts&sid=8", AsWin1251("/Search/author/?q=Штрибель Х.В."), "/Search/author/?q=%D8%F2%F0%E8%E1%E5%EB%FC%20%D5.%C2.", AsWin1251("javascript:ins(\'ГОРШОК\')"), "javascript:ins(\'%C3%CE%D0%D8%CE%CA\')", AsWin1251("?l=я"), "?l=%FF", AsWin1251("content.php?id=3392&theme=Цена"), "content.php?id=3392&theme=%D6%E5%ED%E0", - "/a-mp3/stype-1/?search=А", "/a-mp3/stype-1/?search=%D0%90", - "/a-mp3/stype-1/?search=Б", "/a-mp3/stype-1/?search=%D0%91", - "/a-mp3/stype-1/?search=В", "/a-mp3/stype-1/?search=%D0%92", - "/a-mp3/stype-1/?search=Г", "/a-mp3/stype-1/?search=%D0%93", - "/a-mp3/stype-1/?search=Д", "/a-mp3/stype-1/?search=%D0%94", - "/a-mp3/stype-1/?search=Е", "/a-mp3/stype-1/?search=%D0%95", - "/a-mp3/stype-1/?search=Ж", "/a-mp3/stype-1/?search=%D0%96", - "/a-mp3/stype-1/?search=З", "/a-mp3/stype-1/?search=%D0%97", - // %98 is not defined in CP1251 so don't put it here explicitly + "/a-mp3/stype-1/?search=А", "/a-mp3/stype-1/?search=%D0%90", + "/a-mp3/stype-1/?search=Б", "/a-mp3/stype-1/?search=%D0%91", + "/a-mp3/stype-1/?search=В", "/a-mp3/stype-1/?search=%D0%92", + "/a-mp3/stype-1/?search=Г", "/a-mp3/stype-1/?search=%D0%93", + "/a-mp3/stype-1/?search=Д", "/a-mp3/stype-1/?search=%D0%94", + "/a-mp3/stype-1/?search=Е", "/a-mp3/stype-1/?search=%D0%95", + "/a-mp3/stype-1/?search=Ж", "/a-mp3/stype-1/?search=%D0%96", + "/a-mp3/stype-1/?search=З", "/a-mp3/stype-1/?search=%D0%97", + // %98 is not defined in CP1251 so don't put it here explicitly "/a-mp3/stype-1/?search=\xD0\x98", "/a-mp3/stype-1/?search=%D0%98", - "/a-mp3/stype-1/?search=Й", "/a-mp3/stype-1/?search=%D0%99", - "/a-mp3/stype-1/?search=К", "/a-mp3/stype-1/?search=%D0%9A", - "/a-mp3/stype-1/?search=Л", "/a-mp3/stype-1/?search=%D0%9B", - "/a-mp3/stype-1/?search=М", "/a-mp3/stype-1/?search=%D0%9C", - "/a-mp3/stype-1/?search=Н", "/a-mp3/stype-1/?search=%D0%9D", - "/a-mp3/stype-1/?search=О", "/a-mp3/stype-1/?search=%D0%9E", - "/a-mp3/stype-1/?search=П", "/a-mp3/stype-1/?search=%D0%9F", + "/a-mp3/stype-1/?search=Й", "/a-mp3/stype-1/?search=%D0%99", + "/a-mp3/stype-1/?search=К", "/a-mp3/stype-1/?search=%D0%9A", + "/a-mp3/stype-1/?search=Л", "/a-mp3/stype-1/?search=%D0%9B", + "/a-mp3/stype-1/?search=М", "/a-mp3/stype-1/?search=%D0%9C", + "/a-mp3/stype-1/?search=Н", "/a-mp3/stype-1/?search=%D0%9D", + "/a-mp3/stype-1/?search=О", "/a-mp3/stype-1/?search=%D0%9E", + "/a-mp3/stype-1/?search=П", "/a-mp3/stype-1/?search=%D0%9F", "/a-mp3/stype-1/?search=\xD0", "/a-mp3/stype-1/?search=%D0", - "/a-mp3/stype-1/?search=С", "/a-mp3/stype-1/?search=%D0%A1", - "/a-mp3/stype-1/?search=Т", "/a-mp3/stype-1/?search=%D0%A2", - "/a-mp3/stype-1/?search=У", "/a-mp3/stype-1/?search=%D0%A3", - "/a-mp3/stype-1/?search=Ф", "/a-mp3/stype-1/?search=%D0%A4", - "/a-mp3/stype-1/?search=Х", "/a-mp3/stype-1/?search=%D0%A5", - "/a-mp3/stype-1/?search=Ц", "/a-mp3/stype-1/?search=%D0%A6", - "/a-mp3/stype-1/?search=Ч", "/a-mp3/stype-1/?search=%D0%A7", - "/a-mp3/stype-1/?search=Ш", "/a-mp3/stype-1/?search=%D0%A8", - "/a-mp3/stype-1/?search=Щ", "/a-mp3/stype-1/?search=%D0%A9", - "/a-mp3/stype-1/?search=Ы", "/a-mp3/stype-1/?search=%D0%AB", - "/a-mp3/stype-1/?search=Э", "/a-mp3/stype-1/?search=%D0%AD", - "/a-mp3/stype-1/?search=Ю", "/a-mp3/stype-1/?search=%D0%AE", - "/a-mp3/stype-1/?search=Я", "/a-mp3/stype-1/?search=%D0%AF", - - "javascript:emoticon(\":'(\")", "javascript:emoticon(\":\'(\")", - "javascript:emoticon(\'>:o\')", "javascript:emoticon(\'>:o\')", - "javascript:emoticon(\']:->\')", "javascript:emoticon(\']:->\')", - "javascript:emoticon(\':-!\')", "javascript:emoticon(\':-!\')", - "javascript:emoticon(\'@}->--\')", "javascript:emoticon(\'@}->--\')", - "http://www.is-ufa.ru/price2/price_IS.rar", "http://www.is-ufa.ru/price2/price_IS.rar", - "mailto:info@etem.de", "mailto:info@etem.de", - ""http://www.fubix.ru"", "\"http://www.fubix.ru\"", + "/a-mp3/stype-1/?search=С", "/a-mp3/stype-1/?search=%D0%A1", + "/a-mp3/stype-1/?search=Т", "/a-mp3/stype-1/?search=%D0%A2", + "/a-mp3/stype-1/?search=У", "/a-mp3/stype-1/?search=%D0%A3", + "/a-mp3/stype-1/?search=Ф", "/a-mp3/stype-1/?search=%D0%A4", + "/a-mp3/stype-1/?search=Х", "/a-mp3/stype-1/?search=%D0%A5", + "/a-mp3/stype-1/?search=Ц", "/a-mp3/stype-1/?search=%D0%A6", + "/a-mp3/stype-1/?search=Ч", "/a-mp3/stype-1/?search=%D0%A7", + "/a-mp3/stype-1/?search=Ш", "/a-mp3/stype-1/?search=%D0%A8", + "/a-mp3/stype-1/?search=Щ", "/a-mp3/stype-1/?search=%D0%A9", + "/a-mp3/stype-1/?search=Ы", "/a-mp3/stype-1/?search=%D0%AB", + "/a-mp3/stype-1/?search=Э", "/a-mp3/stype-1/?search=%D0%AD", + "/a-mp3/stype-1/?search=Ю", "/a-mp3/stype-1/?search=%D0%AE", + "/a-mp3/stype-1/?search=Я", "/a-mp3/stype-1/?search=%D0%AF", + + "javascript:emoticon(\":'(\")", "javascript:emoticon(\":\'(\")", + "javascript:emoticon(\'>:o\')", "javascript:emoticon(\'>:o\')", + "javascript:emoticon(\']:->\')", "javascript:emoticon(\']:->\')", + "javascript:emoticon(\':-!\')", "javascript:emoticon(\':-!\')", + "javascript:emoticon(\'@}->--\')", "javascript:emoticon(\'@}->--\')", + "http://www.is-ufa.ru/price2/price_IS.rar", "http://www.is-ufa.ru/price2/price_IS.rar", + "mailto:info@etem.de", "mailto:info@etem.de", + ""http://www.fubix.ru"", "\"http://www.fubix.ru\"", AsWin1251("mailto:kampa@ukr.net?subject=Арабский язык"), "mailto:kampa@ukr.net?subject=%C0%F0%E0%E1%F1%EA%E8%E9%20%FF%E7%FB%EA", {}}; Y_UNIT_TEST(testHtLinkDecode) { - char decodedlink[URL_MAXLEN + 10]; - for (int i = 0; links[i]; i += 2) { + char decodedlink[URL_MAXLEN + 10]; + for (int i = 0; links[i]; i += 2) { UNIT_ASSERT(HtLinkDecode(links[i].c_str(), decodedlink, sizeof(decodedlink))); UNIT_ASSERT_VALUES_EQUAL(decodedlink, links[i + 1]); - } - } + } + } Y_UNIT_TEST(testRuIDNA) { - { + { #define DEC "\xD7\xE5\xF0\xE5\xEf\xEE\xE2\xE5\xF6.\xF0\xF4" /* "Череповец.рф" in Windows-1251 */ #define ENC "%D7%E5%F0%E5%EF%EE%E2%E5%F6.%F0%F4" // punycode corresponds to lowercase #define PNC "xn--b1afab7bff7cb.xn--p1ai" - TTest test = { - "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; - TUri url; - URL_TEST_ENC(url, test, CODES_WIN); - UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); + TTest test = { + "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; + TUri url; + URL_TEST_ENC(url, test, CODES_WIN); + UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); #undef PNC #undef DEC #undef ENC - } - } + } + } // Regression test for SEARCH-11283 Y_UNIT_TEST(RegressionTest11283) { diff --git a/library/cpp/uri/uri.cpp b/library/cpp/uri/uri.cpp index 55cfe57e28..56a9a4e5ef 100644 --- a/library/cpp/uri/uri.cpp +++ b/library/cpp/uri/uri.cpp @@ -1,621 +1,621 @@ #include "uri.h" #include "parse.h" - + #include <util/string/cast.h> #include <util/string/util.h> #include <util/system/yassert.h> - + namespace NUri { - TState::EParsed TUri::CheckHost(const TStringBuf& host) { + TState::EParsed TUri::CheckHost(const TStringBuf& host) { if (host.empty()) - return ParsedOK; - - unsigned domainLevel = 0; - unsigned domainLevelOfUnderscore = 0; - - bool isAlnum = false; - bool startLabel = true; - for (size_t i = 0; i != host.length(); ++i) { - const char ch = host[i]; - - if ('.' == ch) { // label separator - if (!isAlnum || startLabel) // previous label must end in alnum - return ParsedBadHost; - startLabel = true; - continue; - } - - isAlnum = isalnum((const unsigned char)ch); - - if (startLabel) { // label is starting - if (!isAlnum && '_' != ch) // new label must start with alnum or '_' - return ParsedBadHost; - startLabel = false; - ++domainLevel; - if (ch == '_') - domainLevelOfUnderscore = domainLevel; - continue; - } - - if (isAlnum || '-' == ch) - continue; - - if (ch == '_') { // non-standard case we allow for certain hosts + return ParsedOK; + + unsigned domainLevel = 0; + unsigned domainLevelOfUnderscore = 0; + + bool isAlnum = false; + bool startLabel = true; + for (size_t i = 0; i != host.length(); ++i) { + const char ch = host[i]; + + if ('.' == ch) { // label separator + if (!isAlnum || startLabel) // previous label must end in alnum + return ParsedBadHost; + startLabel = true; + continue; + } + + isAlnum = isalnum((const unsigned char)ch); + + if (startLabel) { // label is starting + if (!isAlnum && '_' != ch) // new label must start with alnum or '_' + return ParsedBadHost; + startLabel = false; + ++domainLevel; + if (ch == '_') + domainLevelOfUnderscore = domainLevel; + continue; + } + + if (isAlnum || '-' == ch) + continue; + + if (ch == '_') { // non-standard case we allow for certain hosts domainLevelOfUnderscore = domainLevel; - continue; - } - - return ParsedBadHost; + continue; + } + + return ParsedBadHost; } - if (0 < domainLevelOfUnderscore && domainLevel < 2 + domainLevelOfUnderscore) - return ParsedBadHost; - - return ParsedOK; - } - - /********************************************************/ - TUri::TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query, const TStringBuf& scheme, unsigned defaultPort) - : FieldsSet(0) - , Port(port) - , DefaultPort(0) - , Scheme(SchemeEmpty) - , FieldsDirty(0) - { + if (0 < domainLevelOfUnderscore && domainLevel < 2 + domainLevelOfUnderscore) + return ParsedBadHost; + + return ParsedOK; + } + + /********************************************************/ + TUri::TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query, const TStringBuf& scheme, unsigned defaultPort) + : FieldsSet(0) + , Port(port) + , DefaultPort(0) + , Scheme(SchemeEmpty) + , FieldsDirty(0) + { if (!scheme.empty()) { if (SetSchemeImpl(TSchemeInfo::Get(scheme)).Str.empty()) - FldSet(FieldScheme, scheme); + FldSet(FieldScheme, scheme); } - if (0 < defaultPort) // override the scheme's default port - DefaultPort = static_cast<ui16>(defaultPort); + if (0 < defaultPort) // override the scheme's default port + DefaultPort = static_cast<ui16>(defaultPort); - char sport[6]; // enough for ui16 - if (0 != port) { - const size_t len = ToString(port, sport, sizeof(sport)); - FldSet(FieldPort, TStringBuf(sport, len)); - } + char sport[6]; // enough for ui16 + if (0 != port) { + const size_t len = ToString(port, sport, sizeof(sport)); + FldSet(FieldPort, TStringBuf(sport, len)); + } - FldTrySet(FieldHost, host); - FldTrySet(FieldPath, path); - FldTrySet(FieldQuery, query); + FldTrySet(FieldHost, host); + FldTrySet(FieldPath, path); + FldTrySet(FieldQuery, query); - Rewrite(); + Rewrite(); } - /********************************************************/ - bool TUri::FldSetImpl( - EField field, TStringBuf value, bool strconst, bool nocopy) { - if (!FldIsValid(field)) - return false; + /********************************************************/ + bool TUri::FldSetImpl( + EField field, TStringBuf value, bool strconst, bool nocopy) { + if (!FldIsValid(field)) + return false; - switch (field) { - case FieldScheme: + switch (field) { + case FieldScheme: if (!SetScheme(TSchemeInfo::Get(value)).Str.empty()) - return false; - break; + return false; + break; - case FieldPort: + case FieldPort: Port = value.empty() ? 0 : FromString<ui16>(value); - break; + break; - default: - break; - } + default: + break; + } - if (!value.IsInited()) { - FldClr(field); - return false; - } + if (!value.IsInited()) { + FldClr(field); + return false; + } - if (strconst) { // string constants don't need to be saved in the buffer - FldMarkClean(field); - FldSetNoDirty(field, value); + if (strconst) { // string constants don't need to be saved in the buffer + FldMarkClean(field); + FldSetNoDirty(field, value); return false; - } + } - if (nocopy) { - FldSet(field, value); - return true; - } + if (nocopy) { + FldSet(field, value); + return true; + } - return FldTryCpy(field, value); + return FldTryCpy(field, value); } - /********************************************************/ - bool TUri::FldTryCpy(EField field, const TStringBuf& value) { - if (!FldIsDirty(field)) { - do { + /********************************************************/ + bool TUri::FldTryCpy(EField field, const TStringBuf& value) { + if (!FldIsDirty(field)) { + do { if (!FldIsSet(field)) - break; - - TStringBuf& fld = Fields[field]; - if (fld.length() < value.length()) - break; - - char* oldV = (char*)fld.data(); - if (!IsInBuffer(oldV)) - break; - - memcpy(oldV, value.data(), value.length()); - oldV[value.length()] = 0; - fld.Trunc(value.length()); - return false; - } while (false); - - FldMarkDirty(field); - } - + break; + + TStringBuf& fld = Fields[field]; + if (fld.length() < value.length()) + break; + + char* oldV = (char*)fld.data(); + if (!IsInBuffer(oldV)) + break; + + memcpy(oldV, value.data(), value.length()); + oldV[value.length()] = 0; + fld.Trunc(value.length()); + return false; + } while (false); + + FldMarkDirty(field); + } + FldSetNoDirty(field, value); return true; } - /********************************************************/ - void TUri::RewriteImpl() { - size_t len = 0; - for (int i = 0; i < FieldAllMAX; ++i) { - const EField fld = EField(i); - if (FldIsSet(fld)) - len += 1 + Fields[fld].length(); - } + /********************************************************/ + void TUri::RewriteImpl() { + size_t len = 0; + for (int i = 0; i < FieldAllMAX; ++i) { + const EField fld = EField(i); + if (FldIsSet(fld)) + len += 1 + Fields[fld].length(); + } - if (!len) + if (!len) Buffer.Clear(); - else { + else { TBuffer newbuf; newbuf.Resize(len); TMemoryWriteBuffer out(newbuf.data(), newbuf.size()); - for (int i = 0; i < FieldAllMAX; ++i) { - const EField fld = EField(i); - if (!FldIsSet(fld)) - continue; - - const char* beg = out.Buf(); - const TStringBuf& val = Fields[fld]; - out << val; - FldSetNoDirty(fld, TStringBuf(beg, val.length())); + for (int i = 0; i < FieldAllMAX; ++i) { + const EField fld = EField(i); + if (!FldIsSet(fld)) + continue; + + const char* beg = out.Buf(); + const TStringBuf& val = Fields[fld]; + out << val; + FldSetNoDirty(fld, TStringBuf(beg, val.length())); out << '\0'; - } + } Buffer = std::move(newbuf); } - CheckMissingFields(); + CheckMissingFields(); - FieldsDirty = 0; + FieldsDirty = 0; } - void TUri::CheckMissingFields() { - // if host is set but path is not... - if (FldSetCmp(FlagPath | FlagHost, FlagHost)) - // ... and the scheme requires a path... - if (GetSchemeInfo().FldReq & FlagPath) - // ... set path + void TUri::CheckMissingFields() { + // if host is set but path is not... + if (FldSetCmp(FlagPath | FlagHost, FlagHost)) + // ... and the scheme requires a path... + if (GetSchemeInfo().FldReq & FlagPath) + // ... set path FldSetNoDirty(FieldPath, TStringBuf("/")); } - /********************************************************/ - void TUri::Merge(const TUri& base, int correctAbs) { - if (base.Scheme == SchemeUnknown) - return; + /********************************************************/ + void TUri::Merge(const TUri& base, int correctAbs) { + if (base.Scheme == SchemeUnknown) + return; - if (!base.IsValidGlobal()) - return; + if (!base.IsValidGlobal()) + return; - const TStringBuf& selfscheme = GetField(FieldScheme); - // basescheme is present since IsValidGlobal() succeeded - const TStringBuf& basescheme = base.GetField(FieldScheme); - const bool noscheme = !selfscheme.IsInited(); - if (!noscheme && !EqualNoCase(selfscheme, basescheme)) - return; + const TStringBuf& selfscheme = GetField(FieldScheme); + // basescheme is present since IsValidGlobal() succeeded + const TStringBuf& basescheme = base.GetField(FieldScheme); + const bool noscheme = !selfscheme.IsInited(); + if (!noscheme && !EqualNoCase(selfscheme, basescheme)) + return; - const ui32 cleanFields = ~FieldsDirty; - do { + const ui32 cleanFields = ~FieldsDirty; + do { static constexpr TStringBuf rootPath = "/"; - if (noscheme) { + if (noscheme) { if (!basescheme.empty()) { - FldSetNoDirty(FieldScheme, basescheme); - // check if it is canonical - if (basescheme.data() != base.GetSchemeInfo().Str.data()) - FldMarkDirty(FieldScheme); - } - Scheme = base.Scheme; - DefaultPort = base.DefaultPort; - } - - if (!IsNull(FlagHost)) - break; // no merge - - FldTrySet(FieldHost, base); - FldChkSet(FieldPort, base); - Port = base.Port; - - if (noscheme && IsNull(FlagQuery) && IsNull(FlagPath)) - FldTrySet(FieldQuery, base); - - if (IsNull(FlagAuth) && !base.IsNull(FlagAuth)) { - FldChkSet(FieldUser, base); - FldChkSet(FieldPass, base); + FldSetNoDirty(FieldScheme, basescheme); + // check if it is canonical + if (basescheme.data() != base.GetSchemeInfo().Str.data()) + FldMarkDirty(FieldScheme); + } + Scheme = base.Scheme; + DefaultPort = base.DefaultPort; + } + + if (!IsNull(FlagHost)) + break; // no merge + + FldTrySet(FieldHost, base); + FldChkSet(FieldPort, base); + Port = base.Port; + + if (noscheme && IsNull(FlagQuery) && IsNull(FlagPath)) + FldTrySet(FieldQuery, base); + + if (IsNull(FlagAuth) && !base.IsNull(FlagAuth)) { + FldChkSet(FieldUser, base); + FldChkSet(FieldPass, base); } - if (IsValidAbs()) - break; + if (IsValidAbs()) + break; - TStringBuf p0 = base.GetField(FieldPath); - if (!p0.IsInited()) - p0 = rootPath; + TStringBuf p0 = base.GetField(FieldPath); + if (!p0.IsInited()) + p0 = rootPath; - TStringBuf p1 = GetField(FieldPath); - if (!p1.IsInited()) { - if (p0.data() != rootPath.data()) - FldSet(FieldPath, p0); - else - FldSetNoDirty(FieldPath, rootPath); - break; - } - if (p1 && '/' == p1[0]) - p1.Skip(1); // p0 will have one + TStringBuf p1 = GetField(FieldPath); + if (!p1.IsInited()) { + if (p0.data() != rootPath.data()) + FldSet(FieldPath, p0); + else + FldSetNoDirty(FieldPath, rootPath); + break; + } + if (p1 && '/' == p1[0]) + p1.Skip(1); // p0 will have one - bool pathop = true; + bool pathop = true; TTempBufOutput out(p0.length() + p1.length() + 4); - out << p0; - if ('/' != p0.back()) - out << "/../"; - else if (p1.empty() || '.' != p1[0]) - pathop = false; - out << p1; - - char* beg = out.Data(); - char* end = beg + out.Filled(); - if (pathop && !PathOperation(beg, end, correctAbs)) { - Clear(); - break; - } - - // Needs immediate forced rewrite because of TTempBuf - FldSetNoDirty(FieldPath, TStringBuf(beg, end)); - RewriteImpl(); - } while (false); - - CheckMissingFields(); - - // rewrite only if borrowed fields from base - if (cleanFields & FieldsDirty) - RewriteImpl(); + out << p0; + if ('/' != p0.back()) + out << "/../"; + else if (p1.empty() || '.' != p1[0]) + pathop = false; + out << p1; + + char* beg = out.Data(); + char* end = beg + out.Filled(); + if (pathop && !PathOperation(beg, end, correctAbs)) { + Clear(); + break; + } + + // Needs immediate forced rewrite because of TTempBuf + FldSetNoDirty(FieldPath, TStringBuf(beg, end)); + RewriteImpl(); + } while (false); + + CheckMissingFields(); + + // rewrite only if borrowed fields from base + if (cleanFields & FieldsDirty) + RewriteImpl(); } - /********************************************************/ - TUri::TLinkType TUri::Normalize(const TUri& base, - const TStringBuf& link, const TStringBuf& codebase, long careFlags, ECharset enc) { - // parse URL - if (ParsedOK != ParseImpl(link, careFlags, 0, SchemeEmpty, enc)) - return LinkIsBad; + /********************************************************/ + TUri::TLinkType TUri::Normalize(const TUri& base, + const TStringBuf& link, const TStringBuf& codebase, long careFlags, ECharset enc) { + // parse URL + if (ParsedOK != ParseImpl(link, careFlags, 0, SchemeEmpty, enc)) + return LinkIsBad; - const TStringBuf& host = GetHost(); + const TStringBuf& host = GetHost(); - // merge with base URL - // taken either from _BASE_ property or from optional argument + // merge with base URL + // taken either from _BASE_ property or from optional argument if (!codebase.empty()) { - // if optional code base given -- parse it - TUri codebaseUrl; - if (codebaseUrl.ParseImpl(codebase, careFlags, 0, SchemeEmpty, enc) != ParsedOK || !codebaseUrl.IsValidAbs()) - return LinkIsBad; - Merge(codebaseUrl); - } else { - // Base is already in this variable - // see SetProperty() for details - Merge(base); - } - - // check result: must be correct absolute URL - if (!IsValidAbs()) - return LinkBadAbs; + // if optional code base given -- parse it + TUri codebaseUrl; + if (codebaseUrl.ParseImpl(codebase, careFlags, 0, SchemeEmpty, enc) != ParsedOK || !codebaseUrl.IsValidAbs()) + return LinkIsBad; + Merge(codebaseUrl); + } else { + // Base is already in this variable + // see SetProperty() for details + Merge(base); + } + + // check result: must be correct absolute URL + if (!IsValidAbs()) + return LinkBadAbs; if (!host.empty()) { - // - we don't care about different ports for the same server - // - we don't care about win|www|koi|etc. preffixes for the same server - if (GetPort() != base.GetPort() || !EqualNoCase(host, base.GetHost())) - return LinkIsGlobal; - } + // - we don't care about different ports for the same server + // - we don't care about win|www|koi|etc. preffixes for the same server + if (GetPort() != base.GetPort() || !EqualNoCase(host, base.GetHost())) + return LinkIsGlobal; + } - // find out if it is link to itself then ignore it - if (!Compare(base, FlagPath | FlagQuery)) - return LinkIsFragment; + // find out if it is link to itself then ignore it + if (!Compare(base, FlagPath | FlagQuery)) + return LinkIsFragment; - return LinkIsLocal; + return LinkIsLocal; } - /********************************************************/ - - size_t TUri::PrintSize(ui32 flags) const { - size_t len = 10; - flags &= FieldsSet; // can't output what we don't have - if (flags & FlagHostAscii) - flags &= ~FlagHost; // don't want to print both of them - ui32 opt = 1; - for (int fld = 0; opt <= flags && fld < FieldAllMAX; ++fld, opt <<= 1) { - if (opt & flags) { - const TStringBuf& v = Fields[fld]; - if (v.IsInited()) { - if (opt & FlagAuth) - len += 3 * v.length() + 1; - else - len += v.length() + 1; - } + /********************************************************/ + + size_t TUri::PrintSize(ui32 flags) const { + size_t len = 10; + flags &= FieldsSet; // can't output what we don't have + if (flags & FlagHostAscii) + flags &= ~FlagHost; // don't want to print both of them + ui32 opt = 1; + for (int fld = 0; opt <= flags && fld < FieldAllMAX; ++fld, opt <<= 1) { + if (opt & flags) { + const TStringBuf& v = Fields[fld]; + if (v.IsInited()) { + if (opt & FlagAuth) + len += 3 * v.length() + 1; + else + len += v.length() + 1; + } } } - - return len; + + return len; } - IOutputStream& TUri::PrintImpl(IOutputStream& out, int flags) const { - TStringBuf v; + IOutputStream& TUri::PrintImpl(IOutputStream& out, int flags) const { + TStringBuf v; - const int wantFlags = flags; // save the original - flags &= FieldsSet; // can't print what we don't have - if (flags & FlagHostAscii) - flags |= FlagHost; // to make host checks simpler below + const int wantFlags = flags; // save the original + flags &= FieldsSet; // can't print what we don't have + if (flags & FlagHostAscii) + flags |= FlagHost; // to make host checks simpler below - if (flags & FlagScheme) { - v = Fields[FieldScheme]; + if (flags & FlagScheme) { + v = Fields[FieldScheme]; if (!v.empty()) - out << v << ':'; - } - - TStringBuf host; - if (flags & FlagHost) { - const EField fldhost = - flags & FlagHostAscii ? FieldHostAscii : FieldHost; - host = Fields[fldhost]; - } - - TStringBuf port; - if ((flags & FlagPort) && 0 != Port && Port != DefaultPort) - port = Fields[FieldPort]; - - if (host) { - if (wantFlags & FlagScheme) - out << "//"; - - if (flags & FlagAuth) { - if (flags & FlagUser) { - v = Fields[FieldUser]; + out << v << ':'; + } + + TStringBuf host; + if (flags & FlagHost) { + const EField fldhost = + flags & FlagHostAscii ? FieldHostAscii : FieldHost; + host = Fields[fldhost]; + } + + TStringBuf port; + if ((flags & FlagPort) && 0 != Port && Port != DefaultPort) + port = Fields[FieldPort]; + + if (host) { + if (wantFlags & FlagScheme) + out << "//"; + + if (flags & FlagAuth) { + if (flags & FlagUser) { + v = Fields[FieldUser]; if (!v.empty()) - TEncoder::EncodeNotAlnum(out, v); - } - - if (flags & FlagPass) { - v = Fields[FieldPass]; - if (v.IsInited()) { - out << ':'; - TEncoder::EncodeAll(out, v); - } - } - - out << '@'; + TEncoder::EncodeNotAlnum(out, v); + } + + if (flags & FlagPass) { + v = Fields[FieldPass]; + if (v.IsInited()) { + out << ':'; + TEncoder::EncodeAll(out, v); + } + } + + out << '@'; } - out << host; + out << host; - if (port) - out << ':'; + if (port) + out << ':'; } - if (port) - out << port; + if (port) + out << port; - if (flags & FlagPath) { - v = Fields[FieldPath]; - // for relative, empty path is not the same as missing + if (flags & FlagPath) { + v = Fields[FieldPath]; + // for relative, empty path is not the same as missing if (v.empty() && 0 == (flags & FlagHost)) v = TStringBuf("."); - out << v; - } - - if (flags & FlagQuery) { - v = Fields[FieldQuery]; - if (v.IsInited()) - out << '?' << v; - } - - if (flags & FlagFrag) { - v = Fields[FieldFrag]; - if (v.IsInited()) - out << '#' << v; - } - - return out; + out << v; + } + + if (flags & FlagQuery) { + v = Fields[FieldQuery]; + if (v.IsInited()) + out << '?' << v; + } + + if (flags & FlagFrag) { + v = Fields[FieldFrag]; + if (v.IsInited()) + out << '#' << v; + } + + return out; } - /********************************************************/ - int TUri::CompareField(EField fld, const TUri& url) const { - const TStringBuf& v0 = GetField(fld); - const TStringBuf& v1 = url.GetField(fld); - switch (fld) { - case FieldScheme: - case FieldHost: - return CompareNoCase(v0, v1); - default: - return v0.compare(v1); - } + /********************************************************/ + int TUri::CompareField(EField fld, const TUri& url) const { + const TStringBuf& v0 = GetField(fld); + const TStringBuf& v1 = url.GetField(fld); + switch (fld) { + case FieldScheme: + case FieldHost: + return CompareNoCase(v0, v1); + default: + return v0.compare(v1); + } } - /********************************************************/ - int TUri::Compare(const TUri& url, int flags) const { - // first compare fields with default values - if (flags & FlagPort) { - const int ret = GetPort() - url.GetPort(); - if (ret) - return ret; - flags &= ~FlagPort; - } - - // compare remaining sets of available fields - const int rtflags = flags & url.FieldsSet; - flags &= FieldsSet; - const int fldcmp = flags - rtflags; - if (fldcmp) - return fldcmp; - - // field sets are the same, compare the fields themselves - for (int i = 0; i < FieldAllMAX; ++i) { - const EField fld = EField(i); - if (flags & FldFlag(fld)) { - const int ret = CompareField(fld, url); - if (ret) - return ret; - } - } - - return 0; + /********************************************************/ + int TUri::Compare(const TUri& url, int flags) const { + // first compare fields with default values + if (flags & FlagPort) { + const int ret = GetPort() - url.GetPort(); + if (ret) + return ret; + flags &= ~FlagPort; + } + + // compare remaining sets of available fields + const int rtflags = flags & url.FieldsSet; + flags &= FieldsSet; + const int fldcmp = flags - rtflags; + if (fldcmp) + return fldcmp; + + // field sets are the same, compare the fields themselves + for (int i = 0; i < FieldAllMAX; ++i) { + const EField fld = EField(i); + if (flags & FldFlag(fld)) { + const int ret = CompareField(fld, url); + if (ret) + return ret; + } + } + + return 0; } - /********************************************************/ - bool TUri::PathOperation(char*& pathPtr, char*& pathEnd, int correctAbs) { - if (!pathPtr) - return false; - if (pathPtr == pathEnd) - return true; + /********************************************************/ + bool TUri::PathOperation(char*& pathPtr, char*& pathEnd, int correctAbs) { + if (!pathPtr) + return false; + if (pathPtr == pathEnd) + return true; - if ((pathEnd - pathPtr) >= 2 && *(pathEnd - 2) == '/' && *(pathEnd - 1) == '.') { - --pathEnd; + if ((pathEnd - pathPtr) >= 2 && *(pathEnd - 2) == '/' && *(pathEnd - 1) == '.') { + --pathEnd; } - char* p_wr = pathEnd; - int upCount = 0; - - char* p_prev = pathEnd; - Y_ASSERT(p_prev > pathPtr); - while (p_prev > pathPtr && *(p_prev - 1) == '/') - p_prev--; - - for (char* p_rd = p_prev; p_rd; p_rd = p_prev) { - Y_ASSERT(p_rd == pathEnd || p_rd[0] == '/'); - p_prev = nullptr; - - char* p = p_rd; - - if (p > pathPtr) { - for (p--; *p != '/'; p--) { - if (p == pathPtr) - break; - } - if (*p == '/') { - p_prev = p++; - if ((p_prev - pathPtr >= 6 && !strnicmp(p_prev - 6, "http://", 7)) || - (p_prev - pathPtr >= 7 && !strnicmp(p_prev - 7, "https://", 8))) { - --p_prev; - --p; - } else { - //skip multiple from head '/' - while (p_prev > pathPtr && *(p_prev - 1) == '/') - p_prev--; - } - } - } - - Y_ASSERT(p_prev == nullptr || p_prev[0] == '/'); - //and the first symbol !='/' after p_prev is p - - if (p == p_rd) { - //empty block: - if (p_prev) { //either tail: - Y_ASSERT(p_rd == p_wr && *(p - 1) == '/'); - --p_wr; - continue; - } else { //or head of abs path - *(--p_wr) = '/'; + char* p_wr = pathEnd; + int upCount = 0; + + char* p_prev = pathEnd; + Y_ASSERT(p_prev > pathPtr); + while (p_prev > pathPtr && *(p_prev - 1) == '/') + p_prev--; + + for (char* p_rd = p_prev; p_rd; p_rd = p_prev) { + Y_ASSERT(p_rd == pathEnd || p_rd[0] == '/'); + p_prev = nullptr; + + char* p = p_rd; + + if (p > pathPtr) { + for (p--; *p != '/'; p--) { + if (p == pathPtr) + break; + } + if (*p == '/') { + p_prev = p++; + if ((p_prev - pathPtr >= 6 && !strnicmp(p_prev - 6, "http://", 7)) || + (p_prev - pathPtr >= 7 && !strnicmp(p_prev - 7, "https://", 8))) { + --p_prev; + --p; + } else { + //skip multiple from head '/' + while (p_prev > pathPtr && *(p_prev - 1) == '/') + p_prev--; + } + } + } + + Y_ASSERT(p_prev == nullptr || p_prev[0] == '/'); + //and the first symbol !='/' after p_prev is p + + if (p == p_rd) { + //empty block: + if (p_prev) { //either tail: + Y_ASSERT(p_rd == p_wr && *(p - 1) == '/'); + --p_wr; + continue; + } else { //or head of abs path + *(--p_wr) = '/'; break; - } + } } - - if (p[0] == '.') { - if (p + 1 == p_rd) { - if (correctAbs || p_prev > pathPtr || pathPtr[0] != '/') - // ignore "./" - continue; - } else { - if ((p[1] == '.') && (p + 2 == p_rd)) { - // register "../" but not print - upCount++; - continue; - } + + if (p[0] == '.') { + if (p + 1 == p_rd) { + if (correctAbs || p_prev > pathPtr || pathPtr[0] != '/') + // ignore "./" + continue; + } else { + if ((p[1] == '.') && (p + 2 == p_rd)) { + // register "../" but not print + upCount++; + continue; + } } } - if (upCount) { - //unregister "../" and not print - upCount--; + if (upCount) { + //unregister "../" and not print + upCount--; continue; } - // print - Y_ASSERT(p < p_rd); - Y_ASSERT(!p_prev || *(p - 1) == '/'); - if (p_wr == p_rd) { //just skip - p_wr = p; - } else { //copy - int l = p_rd - p + 1; - p_wr -= l; - memmove(p_wr, p, l); + // print + Y_ASSERT(p < p_rd); + Y_ASSERT(!p_prev || *(p - 1) == '/'); + if (p_wr == p_rd) { //just skip + p_wr = p; + } else { //copy + int l = p_rd - p + 1; + p_wr -= l; + memmove(p_wr, p, l); } } - if (upCount) { - if (*pathPtr != '/') { - if (pathEnd == p_wr && *(p_wr - 1) == '.') { - Y_ASSERT(*(p_wr - 2) == '.'); - p_wr -= 2; - upCount--; - } - for (; upCount > 0; upCount--) { - *(--p_wr) = '/'; + if (upCount) { + if (*pathPtr != '/') { + if (pathEnd == p_wr && *(p_wr - 1) == '.') { + Y_ASSERT(*(p_wr - 2) == '.'); + p_wr -= 2; + upCount--; + } + for (; upCount > 0; upCount--) { + *(--p_wr) = '/'; *(--p_wr) = '.'; *(--p_wr) = '.'; } - } else { - if (correctAbs > 0) - return false; - if (correctAbs == 0) { - //Bad path but present in RFC: - // "Similarly, parsers must avoid treating "." and ".." - // as special when they are not complete components of - // a relative path. " - for (; upCount > 0; upCount--) { - *(--p_wr) = '.'; - *(--p_wr) = '.'; - *(--p_wr) = '/'; - } - } else { - upCount = false; - } + } else { + if (correctAbs > 0) + return false; + if (correctAbs == 0) { + //Bad path but present in RFC: + // "Similarly, parsers must avoid treating "." and ".." + // as special when they are not complete components of + // a relative path. " + for (; upCount > 0; upCount--) { + *(--p_wr) = '.'; + *(--p_wr) = '.'; + *(--p_wr) = '/'; + } + } else { + upCount = false; + } } } - Y_ASSERT(p_wr >= pathPtr); - - if (upCount) - return false; - pathPtr = p_wr; - return true; - } - - /********************************************************/ - const char* LinkTypeToString(const TUri::TLinkType& t) { - switch (t) { - case TUri::LinkIsBad: - return "LinkIsBad"; - case TUri::LinkBadAbs: - return "LinkBadAbs"; - case TUri::LinkIsFragment: - return "LinkIsFragment"; - case TUri::LinkIsLocal: - return "LinkIsLocal"; - case TUri::LinkIsGlobal: - return "LinkIsGlobal"; - } - Y_ASSERT(0); - return ""; + Y_ASSERT(p_wr >= pathPtr); + + if (upCount) + return false; + pathPtr = p_wr; + return true; + } + + /********************************************************/ + const char* LinkTypeToString(const TUri::TLinkType& t) { + switch (t) { + case TUri::LinkIsBad: + return "LinkIsBad"; + case TUri::LinkBadAbs: + return "LinkBadAbs"; + case TUri::LinkIsFragment: + return "LinkIsFragment"; + case TUri::LinkIsLocal: + return "LinkIsLocal"; + case TUri::LinkIsGlobal: + return "LinkIsGlobal"; + } + Y_ASSERT(0); + return ""; } } diff --git a/library/cpp/uri/uri.h b/library/cpp/uri/uri.h index 05908c4e96..3b6c19fe4a 100644 --- a/library/cpp/uri/uri.h +++ b/library/cpp/uri/uri.h @@ -2,625 +2,625 @@ #include "common.h" #include "encode.h" - + #include <library/cpp/charset/doccodes.h> #include <util/generic/buffer.h> #include <util/generic/ptr.h> #include <util/generic/singleton.h> #include <util/generic/string.h> #include <util/memory/alloc.h> -#include <util/stream/mem.h> +#include <util/stream/mem.h> #include <util/stream/output.h> -#include <util/stream/str.h> -#include <util/system/yassert.h> +#include <util/stream/str.h> +#include <util/system/yassert.h> #include <cstdlib> namespace NUri { - /********************************************************/ - class TUri - : public TFeature, - public TField, - public TScheme, - public TState { - public: - enum TLinkType { - LinkIsBad, - LinkBadAbs, - LinkIsFragment, - LinkIsLocal, - LinkIsGlobal - }; - - private: + /********************************************************/ + class TUri + : public TFeature, + public TField, + public TScheme, + public TState { + public: + enum TLinkType { + LinkIsBad, + LinkBadAbs, + LinkIsFragment, + LinkIsLocal, + LinkIsGlobal + }; + + private: TBuffer Buffer; - TStringBuf Fields[FieldAllMAX]; - ui32 FieldsSet; - ui16 Port; - ui16 DefaultPort; - TScheme::EKind Scheme; - /// contains fields out of buffer (and possibly not null-terminated) - ui32 FieldsDirty; - - private: - void Alloc(size_t len) { - Dealloc(); // to prevent copy below + TStringBuf Fields[FieldAllMAX]; + ui32 FieldsSet; + ui16 Port; + ui16 DefaultPort; + TScheme::EKind Scheme; + /// contains fields out of buffer (and possibly not null-terminated) + ui32 FieldsDirty; + + private: + void Alloc(size_t len) { + Dealloc(); // to prevent copy below Buffer.Resize(len); - } - void Dealloc() { + } + void Dealloc() { Buffer.Clear(); - } - - void ClearImpl() { - Port = 0; - FieldsSet = 0; - Scheme = SchemeEmpty; - FieldsDirty = 0; - } - - void CopyData(const TUri& url) { - FieldsSet = url.FieldsSet; - Port = url.Port; - DefaultPort = url.DefaultPort; - Scheme = url.Scheme; - FieldsDirty = url.FieldsDirty; - } - - void CopyImpl(const TUri& url) { - for (int i = 0; i < FieldAllMAX; ++i) - Fields[i] = url.Fields[i]; + } + + void ClearImpl() { + Port = 0; + FieldsSet = 0; + Scheme = SchemeEmpty; + FieldsDirty = 0; + } + + void CopyData(const TUri& url) { + FieldsSet = url.FieldsSet; + Port = url.Port; + DefaultPort = url.DefaultPort; + Scheme = url.Scheme; + FieldsDirty = url.FieldsDirty; + } + + void CopyImpl(const TUri& url) { + for (int i = 0; i < FieldAllMAX; ++i) + Fields[i] = url.Fields[i]; RewriteImpl(); - } - - private: - static ui32 FldFlag(EField fld) { - return 1 << fld; - } - - public: - static bool FldIsValid(EField fld) { - return 0 <= fld && FieldAllMAX > fld; - } - - bool FldSetCmp(ui32 chk, ui32 exp) const { - return (FieldsSet & chk) == exp; - } - - bool FldSetCmp(ui32 chk) const { - return FldSetCmp(chk, chk); - } - - bool FldIsSet(EField fld) const { - return !FldSetCmp(FldFlag(fld), 0); - } - - private: - void FldMarkSet(EField fld) { - FieldsSet |= FldFlag(fld); - } - - void FldMarkUnset(EField fld) { - FieldsSet &= ~FldFlag(fld); - } - - // use when we know the field is dirty or RewriteImpl will be called - void FldSetNoDirty(EField fld, const TStringBuf& value) { - Fields[fld] = value; - FldMarkSet(fld); - } - - void FldSet(EField fld, const TStringBuf& value) { - FldSetNoDirty(fld, value); - FldMarkDirty(fld); - } - - const TStringBuf& FldGet(EField fld) const { - return Fields[fld]; - } - - private: - /// depending on value, clears or sets it - void FldChkSet(EField fld, const TStringBuf& value) { - if (value.IsInited()) - FldSet(fld, value); - else - FldClr(fld); - } - void FldChkSet(EField fld, const TUri& other) { - FldChkSet(fld, other.GetField(fld)); - } - - /// set only if initialized - bool FldTrySet(EField fld, const TStringBuf& value) { - const bool ok = value.IsInited(); - if (ok) - FldSet(fld, value); - return ok; - } - bool FldTrySet(EField fld, const TUri& other) { - return FldTrySet(fld, other.GetField(fld)); - } - - private: - /// copies the value if it fits - bool FldTryCpy(EField fld, const TStringBuf& value); - - // main method: sets the field value, possibly copies, etc. - bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false); - - public: // clear a field - void FldClr(EField fld) { - Fields[fld].Clear(); - FldMarkUnset(fld); - FldMarkClean(fld); - } - - bool FldTryClr(EField field) { - const bool ok = FldIsSet(field); - if (ok) - FldClr(field); - return ok; - } - - public: // set a field value: might leave state dirty and require a Rewrite() - // copies if fits and not dirty, sets and marks dirty otherwise - bool FldMemCpy(EField field, const TStringBuf& value) { - return FldSetImpl(field, value, false); - } - - // uses directly, marks dirty - /// @note client MUST guarantee value will be alive until Rewrite is called - bool FldMemSet(EField field, const TStringBuf& value) { - return FldSetImpl(field, value, false, true); - } - - // uses directly, doesn't mark dirty (value scope exceeds "this") - bool FldMemUse(EField field, const TStringBuf& value) { - return FldSetImpl(field, value, true); - } - - // uses directly, doesn't mark dirty - template <size_t size> - bool FldMemSet(EField field, const char (&value)[size]) { + } + + private: + static ui32 FldFlag(EField fld) { + return 1 << fld; + } + + public: + static bool FldIsValid(EField fld) { + return 0 <= fld && FieldAllMAX > fld; + } + + bool FldSetCmp(ui32 chk, ui32 exp) const { + return (FieldsSet & chk) == exp; + } + + bool FldSetCmp(ui32 chk) const { + return FldSetCmp(chk, chk); + } + + bool FldIsSet(EField fld) const { + return !FldSetCmp(FldFlag(fld), 0); + } + + private: + void FldMarkSet(EField fld) { + FieldsSet |= FldFlag(fld); + } + + void FldMarkUnset(EField fld) { + FieldsSet &= ~FldFlag(fld); + } + + // use when we know the field is dirty or RewriteImpl will be called + void FldSetNoDirty(EField fld, const TStringBuf& value) { + Fields[fld] = value; + FldMarkSet(fld); + } + + void FldSet(EField fld, const TStringBuf& value) { + FldSetNoDirty(fld, value); + FldMarkDirty(fld); + } + + const TStringBuf& FldGet(EField fld) const { + return Fields[fld]; + } + + private: + /// depending on value, clears or sets it + void FldChkSet(EField fld, const TStringBuf& value) { + if (value.IsInited()) + FldSet(fld, value); + else + FldClr(fld); + } + void FldChkSet(EField fld, const TUri& other) { + FldChkSet(fld, other.GetField(fld)); + } + + /// set only if initialized + bool FldTrySet(EField fld, const TStringBuf& value) { + const bool ok = value.IsInited(); + if (ok) + FldSet(fld, value); + return ok; + } + bool FldTrySet(EField fld, const TUri& other) { + return FldTrySet(fld, other.GetField(fld)); + } + + private: + /// copies the value if it fits + bool FldTryCpy(EField fld, const TStringBuf& value); + + // main method: sets the field value, possibly copies, etc. + bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false); + + public: // clear a field + void FldClr(EField fld) { + Fields[fld].Clear(); + FldMarkUnset(fld); + FldMarkClean(fld); + } + + bool FldTryClr(EField field) { + const bool ok = FldIsSet(field); + if (ok) + FldClr(field); + return ok; + } + + public: // set a field value: might leave state dirty and require a Rewrite() + // copies if fits and not dirty, sets and marks dirty otherwise + bool FldMemCpy(EField field, const TStringBuf& value) { + return FldSetImpl(field, value, false); + } + + // uses directly, marks dirty + /// @note client MUST guarantee value will be alive until Rewrite is called + bool FldMemSet(EField field, const TStringBuf& value) { + return FldSetImpl(field, value, false, true); + } + + // uses directly, doesn't mark dirty (value scope exceeds "this") + bool FldMemUse(EField field, const TStringBuf& value) { + return FldSetImpl(field, value, true); + } + + // uses directly, doesn't mark dirty + template <size_t size> + bool FldMemSet(EField field, const char (&value)[size]) { static_assert(size > 0); return FldSetImpl(field, TStringBuf(value, size - 1), true); - } - - // duplicate one field to another - bool FldDup(EField src, EField dst) { - if (!FldIsSet(src) || !FldIsValid(dst)) - return false; - FldSetNoDirty(dst, FldGet(src)); - if (FldIsDirty(src)) - FldMarkDirty(dst); - else - FldMarkClean(dst); - return true; - } - - // move one field to another - bool FldMov(EField src, EField dst) { - if (!FldDup(src, dst)) - return false; - FldClr(src); - return true; - } - - private: - bool IsInBuffer(const char* buf) const { + } + + // duplicate one field to another + bool FldDup(EField src, EField dst) { + if (!FldIsSet(src) || !FldIsValid(dst)) + return false; + FldSetNoDirty(dst, FldGet(src)); + if (FldIsDirty(src)) + FldMarkDirty(dst); + else + FldMarkClean(dst); + return true; + } + + // move one field to another + bool FldMov(EField src, EField dst) { + if (!FldDup(src, dst)) + return false; + FldClr(src); + return true; + } + + private: + bool IsInBuffer(const char* buf) const { return buf >= Buffer.data() && buf < Buffer.data() + Buffer.size(); - } + } - public: - bool FldIsDirty() const { - return 0 != FieldsDirty; - } + public: + bool FldIsDirty() const { + return 0 != FieldsDirty; + } - bool FldIsDirty(EField fld) const { - return 0 != (FieldsDirty & FldFlag(fld)); - } + bool FldIsDirty(EField fld) const { + return 0 != (FieldsDirty & FldFlag(fld)); + } - private: - void FldMarkDirty(EField fld) { - FieldsDirty |= FldFlag(fld); - } + private: + void FldMarkDirty(EField fld) { + FieldsDirty |= FldFlag(fld); + } - void FldMarkClean(EField fld) { - FieldsDirty &= ~FldFlag(fld); - } + void FldMarkClean(EField fld) { + FieldsDirty &= ~FldFlag(fld); + } - void RewriteImpl(); + void RewriteImpl(); - public: - static TState::EParsed CheckHost(const TStringBuf& host); + public: + static TState::EParsed CheckHost(const TStringBuf& host); - // convert a [potential] IDN to ascii - static TMallocPtr<char> IDNToAscii(const wchar32* idna); - static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8); + // convert a [potential] IDN to ascii + static TMallocPtr<char> IDNToAscii(const wchar32* idna); + static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8); - // convert hosts with percent-encoded or extended chars + // convert hosts with percent-encoded or extended chars - // returns non-empty string if host can be converted to ASCII with given parameters - static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8); + // returns non-empty string if host can be converted to ASCII with given parameters + static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8); - // returns host if already ascii, or non-empty if it can be converted - static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8); + // returns host if already ascii, or non-empty if it can be converted + static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8); - public: - explicit TUri(unsigned defaultPort = 0) - : FieldsSet(0) - , Port(0) - , DefaultPort(static_cast<ui16>(defaultPort)) - , Scheme(SchemeEmpty) - , FieldsDirty(0) - { - } + public: + explicit TUri(unsigned defaultPort = 0) + : FieldsSet(0) + , Port(0) + , DefaultPort(static_cast<ui16>(defaultPort)) + , Scheme(SchemeEmpty) + , FieldsDirty(0) + { + } - TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0); + TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0); - TUri(const TUri& url) + TUri(const TUri& url) : FieldsSet(url.FieldsSet) - , Port(url.Port) - , DefaultPort(url.DefaultPort) - , Scheme(url.Scheme) - , FieldsDirty(url.FieldsDirty) - { - CopyImpl(url); - } + , Port(url.Port) + , DefaultPort(url.DefaultPort) + , Scheme(url.Scheme) + , FieldsDirty(url.FieldsDirty) + { + CopyImpl(url); + } + + ~TUri() { + Clear(); + } + + void Copy(const TUri& url) { + if (&url != this) { + CopyData(url); + CopyImpl(url); + } + } + + void Clear() { + Dealloc(); + ClearImpl(); + } + + ui32 GetFieldMask() const { + return FieldsSet; + } + + ui32 GetUrlFieldMask() const { + return GetFieldMask() & FlagUrlFields; + } + + ui32 GetDirtyMask() const { + return FieldsDirty; + } + + void CheckMissingFields(); + + // Process methods + + void Rewrite() { + if (FldIsDirty()) + RewriteImpl(); + } + + private: + TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty); + + TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8); + + public: + TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) { + const TState::EParsed ret = AssignImpl(parser, defscheme); + if (ParsedOK == ret) + Rewrite(); + return ret; + } + + TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { + const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); + if (ParsedOK == ret) + Rewrite(); + return ret; + } + + // parses absolute URIs + // prepends default scheme (unless unknown) if URI has none + TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8); + + TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { + return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc); + } + + TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8); + + TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) { + return ParseUri(url, flags); + } + + TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8); + + TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) { + const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc); + return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat; + } + + // correctAbs works with head "/.." portions: + // 1 - reject URL + // 0 - keep portions + // -1 - ignore portions + + void Merge(const TUri& base, int correctAbs = -1); - ~TUri() { - Clear(); - } - - void Copy(const TUri& url) { - if (&url != this) { - CopyData(url); - CopyImpl(url); - } - } + TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), long careFlags = FeaturesDefault, ECharset enc = CODES_UTF8); - void Clear() { - Dealloc(); - ClearImpl(); + private: + int PrintFlags(int flags) const { + if (0 == (FlagUrlFields & flags)) + flags |= FlagUrlFields; + return flags; } - - ui32 GetFieldMask() const { - return FieldsSet; - } - - ui32 GetUrlFieldMask() const { - return GetFieldMask() & FlagUrlFields; - } - - ui32 GetDirtyMask() const { - return FieldsDirty; - } - - void CheckMissingFields(); - - // Process methods - - void Rewrite() { - if (FldIsDirty()) - RewriteImpl(); - } - - private: - TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty); - - TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8); - - public: - TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) { - const TState::EParsed ret = AssignImpl(parser, defscheme); - if (ParsedOK == ret) - Rewrite(); - return ret; - } - - TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { - const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); - if (ParsedOK == ret) - Rewrite(); - return ret; - } - - // parses absolute URIs - // prepends default scheme (unless unknown) if URI has none - TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8); - - TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { - return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc); - } - - TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8); - - TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) { - return ParseUri(url, flags); - } - - TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8); - - TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) { - const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc); - return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat; - } - - // correctAbs works with head "/.." portions: - // 1 - reject URL - // 0 - keep portions - // -1 - ignore portions - - void Merge(const TUri& base, int correctAbs = -1); - - TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), long careFlags = FeaturesDefault, ECharset enc = CODES_UTF8); - - private: - int PrintFlags(int flags) const { - if (0 == (FlagUrlFields & flags)) - flags |= FlagUrlFields; - return flags; - } - - protected: - size_t PrintSize(ui32 flags) const; - - // Output method, prints to stream - IOutputStream& PrintImpl(IOutputStream& out, int flags) const; - - char* PrintImpl(char* str, size_t size, int flags) const { - TMemoryOutput out(str, size); - PrintImpl(out, flags) << '\0'; - return str; - } - - static bool IsAbsPath(const TStringBuf& path) { - return 1 <= path.length() && path[0] == '/'; - } - - bool IsAbsPathImpl() const { - return IsAbsPath(GetField(FieldPath)); - } - - public: - // Output method, prints to stream - IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const { - return PrintImpl(out, PrintFlags(flags)); - } - - // Output method, print to str, allocate memory if str is NULL - // Should be deprecated - char* Print(char* str, size_t size, int flags = FlagUrlFields) const { - return nullptr == str ? Serialize(flags) : Serialize(str, size, flags); - } - - char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const { - Y_ASSERT(str); - flags = PrintFlags(flags); - const size_t printSize = PrintSize(flags) + 1; - return printSize > size ? nullptr : PrintImpl(str, size, flags); - } - - char* Serialize(int flags = FlagUrlFields) const { - flags = PrintFlags(flags); - const size_t size = PrintSize(flags) + 1; - return PrintImpl(static_cast<char*>(malloc(size)), size, flags); - } - - // Output method to str - void Print(TString& str, int flags = FlagUrlFields) const { - flags = PrintFlags(flags); - str.reserve(str.length() + PrintSize(flags)); - TStringOutput out(str); - PrintImpl(out, flags); - } - - TString PrintS(int flags = FlagUrlFields) const { - TString str; - Print(str, flags); - return str; - } - - // Only non-default scheme and port are printed - char* PrintHost(char* str, size_t size) const { - return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort); - } - TString PrintHostS() const { - return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort); - } - - // Info methods - int Compare(const TUri& A, int flags = FlagUrlFields) const; - - int CompareField(EField fld, const TUri& url) const; - - const TStringBuf& GetField(EField fld) const { - return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>(); - } - - ui16 GetPort() const { - return 0 == Port ? DefaultPort : Port; - } - - const TStringBuf& GetHost() const { - if (GetFieldMask() & FlagHostAscii) - return FldGet(FieldHostAscii); - if (GetFieldMask() & FlagHost) - return FldGet(FieldHost); - return Default<TStringBuf>(); - } - - bool UseHostAscii() { - return FldMov(FieldHostAscii, FieldHost); - } - - TScheme::EKind GetScheme() const { - return Scheme; - } - const TSchemeInfo& GetSchemeInfo() const { - return TSchemeInfo::Get(Scheme); - } - - bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const { - return !FldSetCmp(flags); - } - - bool IsNull(EField fld) const { - return !FldIsSet(fld); - } - - bool IsValidAbs() const { - if (IsNull(FlagScheme | FlagHost | FlagPath)) - return false; - return IsAbsPathImpl(); - } - - bool IsValidGlobal() const { - if (IsNull(FlagScheme | FlagHost)) - return false; - if (IsNull(FlagPath)) - return true; - return IsAbsPathImpl(); - } - - bool IsRootless() const { - return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl(); - } - - // for RFC 2396 compatibility - bool IsOpaque() const { - return IsRootless(); - } - - // Inline helpers - TUri& operator=(const TUri& u) { - Copy(u); - return *this; - } - - bool operator!() const { - return IsNull(); - } - - bool Equal(const TUri& A, int flags = FlagUrlFields) const { - return (Compare(A, flags) == 0); - } - - bool Less(const TUri& A, int flags = FlagUrlFields) const { - return (Compare(A, flags) < 0); - } - - bool operator==(const TUri& A) const { - return Equal(A, FlagNoFrag); - } - - bool operator!=(const TUri& A) const { - return !Equal(A, FlagNoFrag); - } - - bool operator<(const TUri& A) const { - return Less(A, FlagNoFrag); - } - - bool IsSameDocument(const TUri& other) const { - // pre: both *this and 'other' should be normalized to valid abs - Y_ASSERT(IsValidAbs()); - return Equal(other, FlagNoFrag); - } - - bool IsLocal(const TUri& other) const { - // pre: both *this and 'other' should be normalized to valid abs - Y_ASSERT(IsValidAbs() && other.IsValidAbs()); - return Equal(other, FlagScheme | FlagHostPort); - } - - TLinkType Locality(const TUri& other) const { - if (IsSameDocument(other)) - return LinkIsFragment; - else if (IsLocal(other)) - return LinkIsLocal; - return LinkIsGlobal; - } - - static IOutputStream& ReEncodeField(IOutputStream& out, const TStringBuf& val, EField fld, long flags = FeaturesEncodeDecode) { - return NEncode::TEncoder::ReEncode(out, val, NEncode::TEncodeMapper(flags, fld)); - } - - static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, long srcflags, EField dstfld, long dstflags) { - return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld)); - } - - static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, long flags = FeaturesEncodeDecode) { - return ReEncodeField(out, val, FieldAllMAX, flags); - } - - static int PathOperationFlag(const TParseFlags& flags) { - return flags & FeaturePathDenyRootParent ? 1 - : flags & FeaturePathStripRootParent ? -1 : 0; - } - - static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs); - - private: - const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) { - Scheme = info.Kind; - DefaultPort = info.Port; + + protected: + size_t PrintSize(ui32 flags) const; + + // Output method, prints to stream + IOutputStream& PrintImpl(IOutputStream& out, int flags) const; + + char* PrintImpl(char* str, size_t size, int flags) const { + TMemoryOutput out(str, size); + PrintImpl(out, flags) << '\0'; + return str; + } + + static bool IsAbsPath(const TStringBuf& path) { + return 1 <= path.length() && path[0] == '/'; + } + + bool IsAbsPathImpl() const { + return IsAbsPath(GetField(FieldPath)); + } + + public: + // Output method, prints to stream + IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const { + return PrintImpl(out, PrintFlags(flags)); + } + + // Output method, print to str, allocate memory if str is NULL + // Should be deprecated + char* Print(char* str, size_t size, int flags = FlagUrlFields) const { + return nullptr == str ? Serialize(flags) : Serialize(str, size, flags); + } + + char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const { + Y_ASSERT(str); + flags = PrintFlags(flags); + const size_t printSize = PrintSize(flags) + 1; + return printSize > size ? nullptr : PrintImpl(str, size, flags); + } + + char* Serialize(int flags = FlagUrlFields) const { + flags = PrintFlags(flags); + const size_t size = PrintSize(flags) + 1; + return PrintImpl(static_cast<char*>(malloc(size)), size, flags); + } + + // Output method to str + void Print(TString& str, int flags = FlagUrlFields) const { + flags = PrintFlags(flags); + str.reserve(str.length() + PrintSize(flags)); + TStringOutput out(str); + PrintImpl(out, flags); + } + + TString PrintS(int flags = FlagUrlFields) const { + TString str; + Print(str, flags); + return str; + } + + // Only non-default scheme and port are printed + char* PrintHost(char* str, size_t size) const { + return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort); + } + TString PrintHostS() const { + return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort); + } + + // Info methods + int Compare(const TUri& A, int flags = FlagUrlFields) const; + + int CompareField(EField fld, const TUri& url) const; + + const TStringBuf& GetField(EField fld) const { + return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>(); + } + + ui16 GetPort() const { + return 0 == Port ? DefaultPort : Port; + } + + const TStringBuf& GetHost() const { + if (GetFieldMask() & FlagHostAscii) + return FldGet(FieldHostAscii); + if (GetFieldMask() & FlagHost) + return FldGet(FieldHost); + return Default<TStringBuf>(); + } + + bool UseHostAscii() { + return FldMov(FieldHostAscii, FieldHost); + } + + TScheme::EKind GetScheme() const { + return Scheme; + } + const TSchemeInfo& GetSchemeInfo() const { + return TSchemeInfo::Get(Scheme); + } + + bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const { + return !FldSetCmp(flags); + } + + bool IsNull(EField fld) const { + return !FldIsSet(fld); + } + + bool IsValidAbs() const { + if (IsNull(FlagScheme | FlagHost | FlagPath)) + return false; + return IsAbsPathImpl(); + } + + bool IsValidGlobal() const { + if (IsNull(FlagScheme | FlagHost)) + return false; + if (IsNull(FlagPath)) + return true; + return IsAbsPathImpl(); + } + + bool IsRootless() const { + return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl(); + } + + // for RFC 2396 compatibility + bool IsOpaque() const { + return IsRootless(); + } + + // Inline helpers + TUri& operator=(const TUri& u) { + Copy(u); + return *this; + } + + bool operator!() const { + return IsNull(); + } + + bool Equal(const TUri& A, int flags = FlagUrlFields) const { + return (Compare(A, flags) == 0); + } + + bool Less(const TUri& A, int flags = FlagUrlFields) const { + return (Compare(A, flags) < 0); + } + + bool operator==(const TUri& A) const { + return Equal(A, FlagNoFrag); + } + + bool operator!=(const TUri& A) const { + return !Equal(A, FlagNoFrag); + } + + bool operator<(const TUri& A) const { + return Less(A, FlagNoFrag); + } + + bool IsSameDocument(const TUri& other) const { + // pre: both *this and 'other' should be normalized to valid abs + Y_ASSERT(IsValidAbs()); + return Equal(other, FlagNoFrag); + } + + bool IsLocal(const TUri& other) const { + // pre: both *this and 'other' should be normalized to valid abs + Y_ASSERT(IsValidAbs() && other.IsValidAbs()); + return Equal(other, FlagScheme | FlagHostPort); + } + + TLinkType Locality(const TUri& other) const { + if (IsSameDocument(other)) + return LinkIsFragment; + else if (IsLocal(other)) + return LinkIsLocal; + return LinkIsGlobal; + } + + static IOutputStream& ReEncodeField(IOutputStream& out, const TStringBuf& val, EField fld, long flags = FeaturesEncodeDecode) { + return NEncode::TEncoder::ReEncode(out, val, NEncode::TEncodeMapper(flags, fld)); + } + + static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, long srcflags, EField dstfld, long dstflags) { + return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld)); + } + + static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, long flags = FeaturesEncodeDecode) { + return ReEncodeField(out, val, FieldAllMAX, flags); + } + + static int PathOperationFlag(const TParseFlags& flags) { + return flags & FeaturePathDenyRootParent ? 1 + : flags & FeaturePathStripRootParent ? -1 : 0; + } + + static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs); + + private: + const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) { + Scheme = info.Kind; + DefaultPort = info.Port; if (!info.Str.empty()) - FldSetNoDirty(FieldScheme, info.Str); - return info; - } - const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) { - return SetSchemeImpl(TSchemeInfo::Get(scheme)); - } - - public: - const TSchemeInfo& SetScheme(const TSchemeInfo& info) { - SetSchemeImpl(info); + FldSetNoDirty(FieldScheme, info.Str); + return info; + } + const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) { + return SetSchemeImpl(TSchemeInfo::Get(scheme)); + } + + public: + const TSchemeInfo& SetScheme(const TSchemeInfo& info) { + SetSchemeImpl(info); if (!info.Str.empty()) - FldMarkClean(FieldScheme); - return info; - } - const TSchemeInfo& SetScheme(TScheme::EKind scheme) { - return SetScheme(TSchemeInfo::Get(scheme)); - } - }; - - class TUriUpdate { - TUri& Uri_; - - public: - TUriUpdate(TUri& uri) - : Uri_(uri) - { - } - ~TUriUpdate() { - Uri_.Rewrite(); - } - - public: - bool Set(TField::EField field, const TStringBuf& value) { - return Uri_.FldMemSet(field, value); - } - - template <size_t size> - bool Set(TField::EField field, const char (&value)[size]) { - return Uri_.FldMemSet(field, value); - } - - void Clr(TField::EField field) { - Uri_.FldClr(field); - } - }; - - const char* LinkTypeToString(const TUri::TLinkType& t); + FldMarkClean(FieldScheme); + return info; + } + const TSchemeInfo& SetScheme(TScheme::EKind scheme) { + return SetScheme(TSchemeInfo::Get(scheme)); + } + }; + + class TUriUpdate { + TUri& Uri_; + + public: + TUriUpdate(TUri& uri) + : Uri_(uri) + { + } + ~TUriUpdate() { + Uri_.Rewrite(); + } + + public: + bool Set(TField::EField field, const TStringBuf& value) { + return Uri_.FldMemSet(field, value); + } + + template <size_t size> + bool Set(TField::EField field, const char (&value)[size]) { + return Uri_.FldMemSet(field, value); + } + + void Clr(TField::EField field) { + Uri_.FldClr(field); + } + }; + + const char* LinkTypeToString(const TUri::TLinkType& t); } -Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) { +Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) { url.Print(out); } -Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) { +Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) { out << NUri::LinkTypeToString(t); } diff --git a/library/cpp/uri/uri_ut.cpp b/library/cpp/uri/uri_ut.cpp index 36fa566ef9..2ebd83fc93 100644 --- a/library/cpp/uri/uri_ut.cpp +++ b/library/cpp/uri/uri_ut.cpp @@ -2,888 +2,888 @@ #include "other.h" #include "qargs.h" #include <library/cpp/html/entity/htmlentity.h> - + #include <util/system/maxlen.h> namespace NUri { Y_UNIT_TEST_SUITE(URLTest) { - static const char* urls[] = { - "http://a/b/c/d;p?q#r", - "g", "http://a/b/c/g", - "./g", "http://a/b/c/g", - "g/", "http://a/b/c/g/", - "/g", "http://a/g", - "//g", "http://g/", - "?y", "http://a/b/c/d;p?y", - "g?y", "http://a/b/c/g?y", - "#s", "http://a/b/c/d;p?q#s", - "g#s", "http://a/b/c/g#s", - "g?y#s", "http://a/b/c/g?y#s", - ";x", "http://a/b/c/;x", - "g;x", "http://a/b/c/g;x", - "g;x?y#s", "http://a/b/c/g;x?y#s", - ".", "http://a/b/c/", - "./", "http://a/b/c/", - "./.", "http://a/b/c/", - "././", "http://a/b/c/", - "././.", "http://a/b/c/", - "..", "http://a/b/", - "../", "http://a/b/", - "../.", "http://a/b/", - "../g", "http://a/b/g", - "../..", "http://a/", - "../../", "http://a/", - "../../.", "http://a/", - "../../g", "http://a/g", - "../../../g", "http://a/g", - "../../../../g", "http://a/g", - "/./g", "http://a/g", - "g.", "http://a/b/c/g.", - ".g", "http://a/b/c/.g", - "g..", "http://a/b/c/g..", - "..g", "http://a/b/c/..g", - "./../g", "http://a/b/g", - "./g/.", "http://a/b/c/g/", - "g/./h", "http://a/b/c/g/h", - "g/../h", "http://a/b/c/h", - "g;x=1/./y", "http://a/b/c/g;x=1/y", - "g;x=1/../y", "http://a/b/c/y", - "g?y/./x", "http://a/b/c/g?y/./x", - "g?y/../x", "http://a/b/c/g?y/../x", - "g#s/./x", "http://a/b/c/g#s/./x", - "g#s/../x", "http://a/b/c/g#s/../x", - "?", "http://a/b/c/d;p?", - "/?", "http://a/?", - "x?", "http://a/b/c/x?", - "x%20y", "http://a/b/c/x%20y", - "%20y", "http://a/b/c/%20y", - // "%2zy", "http://a/b/c/%2zy", - nullptr}; + static const char* urls[] = { + "http://a/b/c/d;p?q#r", + "g", "http://a/b/c/g", + "./g", "http://a/b/c/g", + "g/", "http://a/b/c/g/", + "/g", "http://a/g", + "//g", "http://g/", + "?y", "http://a/b/c/d;p?y", + "g?y", "http://a/b/c/g?y", + "#s", "http://a/b/c/d;p?q#s", + "g#s", "http://a/b/c/g#s", + "g?y#s", "http://a/b/c/g?y#s", + ";x", "http://a/b/c/;x", + "g;x", "http://a/b/c/g;x", + "g;x?y#s", "http://a/b/c/g;x?y#s", + ".", "http://a/b/c/", + "./", "http://a/b/c/", + "./.", "http://a/b/c/", + "././", "http://a/b/c/", + "././.", "http://a/b/c/", + "..", "http://a/b/", + "../", "http://a/b/", + "../.", "http://a/b/", + "../g", "http://a/b/g", + "../..", "http://a/", + "../../", "http://a/", + "../../.", "http://a/", + "../../g", "http://a/g", + "../../../g", "http://a/g", + "../../../../g", "http://a/g", + "/./g", "http://a/g", + "g.", "http://a/b/c/g.", + ".g", "http://a/b/c/.g", + "g..", "http://a/b/c/g..", + "..g", "http://a/b/c/..g", + "./../g", "http://a/b/g", + "./g/.", "http://a/b/c/g/", + "g/./h", "http://a/b/c/g/h", + "g/../h", "http://a/b/c/h", + "g;x=1/./y", "http://a/b/c/g;x=1/y", + "g;x=1/../y", "http://a/b/c/y", + "g?y/./x", "http://a/b/c/g?y/./x", + "g?y/../x", "http://a/b/c/g?y/../x", + "g#s/./x", "http://a/b/c/g#s/./x", + "g#s/../x", "http://a/b/c/g#s/../x", + "?", "http://a/b/c/d;p?", + "/?", "http://a/?", + "x?", "http://a/b/c/x?", + "x%20y", "http://a/b/c/x%20y", + "%20y", "http://a/b/c/%20y", + // "%2zy", "http://a/b/c/%2zy", + nullptr}; Y_UNIT_TEST(test_httpURL) { - TUri rel, base, abs; - TState::EParsed er = base.Parse(urls[0]); - UNIT_ASSERT_VALUES_EQUAL(er, TState::ParsedOK); - UNIT_ASSERT(base.IsValidAbs()); - UNIT_ASSERT_VALUES_EQUAL(base.PrintS(), urls[0]); - - TString errbuf; - TStringOutput out(errbuf); - const long mflag = TFeature::FeaturesAll; - for (int i = 1; urls[i]; i += 2) { - er = rel.Parse(urls[i]); - UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, urls[i]); - rel.Merge(base); - UNIT_ASSERT_VALUES_EQUAL_C(rel.PrintS(), urls[i + 1], urls[i]); - - // try the same thing differently - er = rel.Parse(urls[i], mflag, urls[0]); - UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, urls[i]); - UNIT_ASSERT_VALUES_EQUAL_C(rel.PrintS(), urls[i + 1], urls[i]); - - // lastly... - er = abs.Parse(urls[i + 1], mflag); - UNIT_ASSERT_VALUES_EQUAL(er, TState::ParsedOK); - errbuf.clear(); - out << '[' << rel.PrintS() - << "] != [" << abs.PrintS() << ']'; - UNIT_ASSERT_EQUAL_C(rel, abs, errbuf); - } - } + TUri rel, base, abs; + TState::EParsed er = base.Parse(urls[0]); + UNIT_ASSERT_VALUES_EQUAL(er, TState::ParsedOK); + UNIT_ASSERT(base.IsValidAbs()); + UNIT_ASSERT_VALUES_EQUAL(base.PrintS(), urls[0]); + + TString errbuf; + TStringOutput out(errbuf); + const long mflag = TFeature::FeaturesAll; + for (int i = 1; urls[i]; i += 2) { + er = rel.Parse(urls[i]); + UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, urls[i]); + rel.Merge(base); + UNIT_ASSERT_VALUES_EQUAL_C(rel.PrintS(), urls[i + 1], urls[i]); + + // try the same thing differently + er = rel.Parse(urls[i], mflag, urls[0]); + UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, urls[i]); + UNIT_ASSERT_VALUES_EQUAL_C(rel.PrintS(), urls[i + 1], urls[i]); + + // lastly... + er = abs.Parse(urls[i + 1], mflag); + UNIT_ASSERT_VALUES_EQUAL(er, TState::ParsedOK); + errbuf.clear(); + out << '[' << rel.PrintS() + << "] != [" << abs.PrintS() << ']'; + UNIT_ASSERT_EQUAL_C(rel, abs, errbuf); + } + } Y_UNIT_TEST(test_Schemes) { - TUri url; - UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeEmpty); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.ya.ru"), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeHTTP); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("https://www.ya.ru"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("https://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeKnown), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeHTTPS); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpwhatever://www.ya.ru"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpwhatever://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpswhatever://www.ya.ru"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpswhatever://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("ftp://www.ya.ru"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("ftp://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeFTP); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpsssss://www.ya.ru"), TState::ParsedBadScheme); - UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpsssss://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); - } - - struct Link4Norm { - const char* const base; - const char* const link; - const char* const result; - TUri::TLinkType ltype; - }; - - static const Link4Norm link4Norm[] = { - {"http://www.alltest.ru/all.php?a=aberporth", "http://www.alltest.ru/all.php?a=domestic jobs", "", TUri::LinkIsBad}, - {"http://www.alltest.ru/all.php?a=aberporth", "http://www.alltest.ru/all.php?a=domestic%20jobs", "http://www.alltest.ru/all.php?a=domestic%20jobs", TUri::LinkIsLocal}, - {"http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8", "http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/1024", "http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/1024", TUri::LinkIsLocal}, - {nullptr, nullptr, nullptr, TUri::LinkIsBad}, - }; + TUri url; + UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeEmpty); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.ya.ru"), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeHTTP); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("https://www.ya.ru"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("https://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeKnown), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeHTTPS); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpwhatever://www.ya.ru"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpwhatever://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpswhatever://www.ya.ru"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpswhatever://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("ftp://www.ya.ru"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("ftp://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeFTP); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpsssss://www.ya.ru"), TState::ParsedBadScheme); + UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpsssss://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); + } + + struct Link4Norm { + const char* const base; + const char* const link; + const char* const result; + TUri::TLinkType ltype; + }; + + static const Link4Norm link4Norm[] = { + {"http://www.alltest.ru/all.php?a=aberporth", "http://www.alltest.ru/all.php?a=domestic jobs", "", TUri::LinkIsBad}, + {"http://www.alltest.ru/all.php?a=aberporth", "http://www.alltest.ru/all.php?a=domestic%20jobs", "http://www.alltest.ru/all.php?a=domestic%20jobs", TUri::LinkIsLocal}, + {"http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8", "http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/1024", "http://president.rf/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/1024", TUri::LinkIsLocal}, + {nullptr, nullptr, nullptr, TUri::LinkIsBad}, + }; Y_UNIT_TEST(test_httpURLNormalize) { - TUri normalizedLink; - - for (int i = 0; link4Norm[i].link; i++) { - TUri base; - TState::EParsed er = base.Parse(link4Norm[i].base); - UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, link4Norm[i].base); - TUri::TLinkType ltype = normalizedLink.Normalize(base, link4Norm[i].link); - UNIT_ASSERT_VALUES_EQUAL_C(ltype, link4Norm[i].ltype, link4Norm[i].link); - TString s = TUri::LinkIsBad == ltype ? "" : normalizedLink.PrintS(); - UNIT_ASSERT_VALUES_EQUAL_C(s, link4Norm[i].result, link4Norm[i].link); - } - } - - static const char* urlsWithMultipleSlash[] = { - "http://a/http://b", "http://a/http://b", - "http://a/https://b", "http://a/https://b", - "http://a/b://c", "http://a/b:/c", - "http://a/b//c", "http://a/b/c", - nullptr, nullptr}; + TUri normalizedLink; + + for (int i = 0; link4Norm[i].link; i++) { + TUri base; + TState::EParsed er = base.Parse(link4Norm[i].base); + UNIT_ASSERT_VALUES_EQUAL_C(er, TState::ParsedOK, link4Norm[i].base); + TUri::TLinkType ltype = normalizedLink.Normalize(base, link4Norm[i].link); + UNIT_ASSERT_VALUES_EQUAL_C(ltype, link4Norm[i].ltype, link4Norm[i].link); + TString s = TUri::LinkIsBad == ltype ? "" : normalizedLink.PrintS(); + UNIT_ASSERT_VALUES_EQUAL_C(s, link4Norm[i].result, link4Norm[i].link); + } + } + + static const char* urlsWithMultipleSlash[] = { + "http://a/http://b", "http://a/http://b", + "http://a/https://b", "http://a/https://b", + "http://a/b://c", "http://a/b:/c", + "http://a/b//c", "http://a/b/c", + nullptr, nullptr}; Y_UNIT_TEST(test_httpURLPathOperation) { - char copyUrl[URL_MAXLEN]; - for (int i = 0; urlsWithMultipleSlash[i]; i += 2) { - const TStringBuf url(urlsWithMultipleSlash[i]); - const TStringBuf normurl(urlsWithMultipleSlash[i + 1]); - memcpy(copyUrl, url.data(), url.length()); - char* p = copyUrl; - char* e = copyUrl + url.length(); - TUri::PathOperation(p, e, 1); - UNIT_ASSERT_VALUES_EQUAL(TStringBuf(p, e), normurl); - TUri uri; - UNIT_ASSERT_VALUES_EQUAL(TState::ParsedOK, uri.Parse(url)); - UNIT_ASSERT_VALUES_EQUAL_C(uri.PrintS(), normurl, url); - } - } - - static const char* hostsForCheckHost[] = { - "simplehost.ru", - "third_level.host.ru", - "_ok.somewhere.ru", - "a.b", - "second_level.ru", - "_bad.ru", - "_", - "yandex.ru:443", - nullptr}; - - static TState::EParsed answersForCheckHost[] = { - TState::ParsedOK, - TState::ParsedOK, - TState::ParsedOK, - TState::ParsedOK, - TState::ParsedBadHost, - TState::ParsedBadHost, - TState::ParsedBadHost, - TState::ParsedBadHost, - }; + char copyUrl[URL_MAXLEN]; + for (int i = 0; urlsWithMultipleSlash[i]; i += 2) { + const TStringBuf url(urlsWithMultipleSlash[i]); + const TStringBuf normurl(urlsWithMultipleSlash[i + 1]); + memcpy(copyUrl, url.data(), url.length()); + char* p = copyUrl; + char* e = copyUrl + url.length(); + TUri::PathOperation(p, e, 1); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(p, e), normurl); + TUri uri; + UNIT_ASSERT_VALUES_EQUAL(TState::ParsedOK, uri.Parse(url)); + UNIT_ASSERT_VALUES_EQUAL_C(uri.PrintS(), normurl, url); + } + } + + static const char* hostsForCheckHost[] = { + "simplehost.ru", + "third_level.host.ru", + "_ok.somewhere.ru", + "a.b", + "second_level.ru", + "_bad.ru", + "_", + "yandex.ru:443", + nullptr}; + + static TState::EParsed answersForCheckHost[] = { + TState::ParsedOK, + TState::ParsedOK, + TState::ParsedOK, + TState::ParsedOK, + TState::ParsedBadHost, + TState::ParsedBadHost, + TState::ParsedBadHost, + TState::ParsedBadHost, + }; Y_UNIT_TEST(test_httpURLCheckHost) { - for (size_t index = 0; hostsForCheckHost[index]; ++index) { - TState::EParsed state = TUri::CheckHost(hostsForCheckHost[index]); - UNIT_ASSERT_VALUES_EQUAL(state, answersForCheckHost[index]); - } - } + for (size_t index = 0; hostsForCheckHost[index]; ++index) { + TState::EParsed state = TUri::CheckHost(hostsForCheckHost[index]); + UNIT_ASSERT_VALUES_EQUAL(state, answersForCheckHost[index]); + } + } Y_UNIT_TEST(test_httpURLSet) { - // set port - { - TUri parsedUrl; - parsedUrl.Parse("http://www.host.com/script.cgi?param1=value1¶m2=value2"); - parsedUrl.FldMemSet(TField::FieldPort, "8080"); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 8080); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); - } - - // clear port - { - TUri parsedUrl; - parsedUrl.Parse("http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); - parsedUrl.FldMemSet(TField::FieldPort, nullptr); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 80); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "http://www.host.com/script.cgi?param1=value1¶m2=value2"); - } - - // change scheme with default port - { - TUri parsedUrl; - parsedUrl.Parse("http://www.host.com/script.cgi?param1=value1¶m2=value2"); - parsedUrl.FldMemSet(TField::FieldScheme, "https"); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 443); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "https://www.host.com/script.cgi?param1=value1¶m2=value2"); - } - - // change scheme with non-default port - { - TUri parsedUrl; - parsedUrl.Parse("http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); - parsedUrl.FldMemSet(TField::FieldScheme, "https"); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 8080); - UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "https://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); - } - } + // set port + { + TUri parsedUrl; + parsedUrl.Parse("http://www.host.com/script.cgi?param1=value1¶m2=value2"); + parsedUrl.FldMemSet(TField::FieldPort, "8080"); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 8080); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); + } + + // clear port + { + TUri parsedUrl; + parsedUrl.Parse("http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); + parsedUrl.FldMemSet(TField::FieldPort, nullptr); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 80); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "http://www.host.com/script.cgi?param1=value1¶m2=value2"); + } + + // change scheme with default port + { + TUri parsedUrl; + parsedUrl.Parse("http://www.host.com/script.cgi?param1=value1¶m2=value2"); + parsedUrl.FldMemSet(TField::FieldScheme, "https"); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 443); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "https://www.host.com/script.cgi?param1=value1¶m2=value2"); + } + + // change scheme with non-default port + { + TUri parsedUrl; + parsedUrl.Parse("http://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); + parsedUrl.FldMemSet(TField::FieldScheme, "https"); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.GetPort(), 8080); + UNIT_ASSERT_VALUES_EQUAL(parsedUrl.PrintS(), "https://www.host.com:8080/script.cgi?param1=value1¶m2=value2"); + } + } Y_UNIT_TEST(test_httpURLAuth) { - { - TUri parsedUrl; - TState::EParsed st = parsedUrl.Parse("http://@www.host.com/path", TFeature::FeaturesRobot); - UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedBadAuth); - } - - { - TUri parsedUrl; - TState::EParsed st = parsedUrl.Parse("http://loginwithnopass@www.host.com/path", TFeature::FeatureAuthSupported); - UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedOK); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldHost), "www.host.com"); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldUser), "loginwithnopass"); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldPass), ""); - } - - { - TUri parsedUrl; - TState::EParsed st = parsedUrl.Parse("http://login:pass@www.host.com/path", TFeature::FeatureAuthSupported); - UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedOK); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldHost), "www.host.com"); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldUser), "login"); - UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldPass), "pass"); - } - } + { + TUri parsedUrl; + TState::EParsed st = parsedUrl.Parse("http://@www.host.com/path", TFeature::FeaturesRobot); + UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedBadAuth); + } + + { + TUri parsedUrl; + TState::EParsed st = parsedUrl.Parse("http://loginwithnopass@www.host.com/path", TFeature::FeatureAuthSupported); + UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedOK); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldHost), "www.host.com"); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldUser), "loginwithnopass"); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldPass), ""); + } + + { + TUri parsedUrl; + TState::EParsed st = parsedUrl.Parse("http://login:pass@www.host.com/path", TFeature::FeatureAuthSupported); + UNIT_ASSERT_VALUES_EQUAL(st, TState::ParsedOK); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldHost), "www.host.com"); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldUser), "login"); + UNIT_ASSERT_EQUAL(parsedUrl.GetField(TField::FieldPass), "pass"); + } + } Y_UNIT_TEST(test01) { - TTest test = { - "user:pass@host:8080", TFeature::FeaturesAll, TState::ParsedRootless, "user", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } + TTest test = { + "user:pass@host:8080", TFeature::FeaturesAll, TState::ParsedRootless, "user", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } Y_UNIT_TEST(test02) { - TTest test = { - "http://host", TFeature::FeaturesAll, TState::ParsedOK, "http", "", "", "host", 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } + TTest test = { + "http://host", TFeature::FeaturesAll, TState::ParsedOK, "http", "", "", "host", 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } Y_UNIT_TEST(test03) { - TTest test = { - "https://host", TFeature::FeatureSchemeFlexible | TFeature::FeatureAllowHostIDN, TState::ParsedOK, "https", "", "", "host", 443, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } + TTest test = { + "https://host", TFeature::FeatureSchemeFlexible | TFeature::FeatureAllowHostIDN, TState::ParsedOK, "https", "", "", "host", 443, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } Y_UNIT_TEST(test04) { - TTest test = { - "user:pass@host:8080", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "user", "", "", "", 0, "pass@host:8080", "", ""}; - TUri url; - URL_TEST(url, test); - TUri url2(url); - CMP_URL(url2, test); - URL_EQ(url, url2); - } + TTest test = { + "user:pass@host:8080", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "user", "", "", "", 0, "pass@host:8080", "", ""}; + TUri url; + URL_TEST(url, test); + TUri url2(url); + CMP_URL(url2, test); + URL_EQ(url, url2); + } Y_UNIT_TEST(test05) { - TTest test = { - "host:8080", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "host", "", "", "", 0, "8080", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "host:8080"); - } + TTest test = { + "host:8080", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "host", "", "", "", 0, "8080", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "host:8080"); + } Y_UNIT_TEST(test06) { - TTest test = { - "http://user:pass@host?q", TFeature::FeaturesAll, TState::ParsedOK, "http", "user", "pass", "host", 80, "/", "q", ""}; - TUri url; - URL_TEST(url, test); - url.FldMemSet(TField::FieldScheme, "https"); - UNIT_ASSERT(!url.FldIsDirty()); - UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldScheme), "https"); - UNIT_ASSERT_VALUES_EQUAL(url.GetPort(), 443); - - // test copying - TUri url2(url); - // make sure strings are equal... - UNIT_ASSERT_VALUES_EQUAL( - url.GetField(TField::FieldUser), - url2.GetField(TField::FieldUser)); - // ... and memory locations are the same - UNIT_ASSERT_EQUAL( - url.GetField(TField::FieldUser), - url2.GetField(TField::FieldUser)); - // and urls compare the same - URL_EQ(url, url2); - - // cause a dirty field - url.FldMemSet(TField::FieldUser, "use"); // it is now shorter - UNIT_ASSERT(!url.FldIsDirty()); + TTest test = { + "http://user:pass@host?q", TFeature::FeaturesAll, TState::ParsedOK, "http", "user", "pass", "host", 80, "/", "q", ""}; + TUri url; + URL_TEST(url, test); + url.FldMemSet(TField::FieldScheme, "https"); + UNIT_ASSERT(!url.FldIsDirty()); + UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldScheme), "https"); + UNIT_ASSERT_VALUES_EQUAL(url.GetPort(), 443); + + // test copying + TUri url2(url); + // make sure strings are equal... + UNIT_ASSERT_VALUES_EQUAL( + url.GetField(TField::FieldUser), + url2.GetField(TField::FieldUser)); + // ... and memory locations are the same + UNIT_ASSERT_EQUAL( + url.GetField(TField::FieldUser), + url2.GetField(TField::FieldUser)); + // and urls compare the same + URL_EQ(url, url2); + + // cause a dirty field + url.FldMemSet(TField::FieldUser, "use"); // it is now shorter + UNIT_ASSERT(!url.FldIsDirty()); url.FldMemSet(TField::FieldUser, TStringBuf("user")); - UNIT_ASSERT(url.FldIsDirty()); - - // copy again - url2 = url; - UNIT_ASSERT(url.FldIsDirty()); - UNIT_ASSERT(!url2.FldIsDirty()); - URL_EQ(url, url2); - // make sure strings are equal... - UNIT_ASSERT_VALUES_EQUAL( - url.GetField(TField::FieldUser), - url2.GetField(TField::FieldUser)); - // ... but memory locations are different - UNIT_ASSERT_UNEQUAL( - url.GetField(TField::FieldUser).data(), - url2.GetField(TField::FieldUser).data()); - URL_EQ(url, url2); - - // make query empty - url.FldMemSet(TField::FieldQuery, ""); - url2 = url; - URL_EQ(url, url2); - // set query to null value (should clear it) - url2.FldMemSet(TField::FieldQuery, TStringBuf()); - // make sure they are no longer equal - URL_NEQ(url, url2); - // reset query - url.FldClr(TField::FieldQuery); - // equal again - URL_EQ(url, url2); - // reset port and set the other to default - url.FldClr(TField::FieldPort); - url2.FldMemSet(TField::FieldPort, "443"); - URL_EQ(url, url2); - } + UNIT_ASSERT(url.FldIsDirty()); + + // copy again + url2 = url; + UNIT_ASSERT(url.FldIsDirty()); + UNIT_ASSERT(!url2.FldIsDirty()); + URL_EQ(url, url2); + // make sure strings are equal... + UNIT_ASSERT_VALUES_EQUAL( + url.GetField(TField::FieldUser), + url2.GetField(TField::FieldUser)); + // ... but memory locations are different + UNIT_ASSERT_UNEQUAL( + url.GetField(TField::FieldUser).data(), + url2.GetField(TField::FieldUser).data()); + URL_EQ(url, url2); + + // make query empty + url.FldMemSet(TField::FieldQuery, ""); + url2 = url; + URL_EQ(url, url2); + // set query to null value (should clear it) + url2.FldMemSet(TField::FieldQuery, TStringBuf()); + // make sure they are no longer equal + URL_NEQ(url, url2); + // reset query + url.FldClr(TField::FieldQuery); + // equal again + URL_EQ(url, url2); + // reset port and set the other to default + url.FldClr(TField::FieldPort); + url2.FldMemSet(TField::FieldPort, "443"); + URL_EQ(url, url2); + } Y_UNIT_TEST(test07) { - { - TTest test = { - "http://host/path//", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "http", "", "", "host", 80, "/path/", "", ""}; - TUri url; - URL_TEST(url, test); - url.FldMemSet(TField::FieldScheme, "HTTPs"); - UNIT_ASSERT_EQUAL(TScheme::SchemeHTTPS, url.GetScheme()); - UNIT_ASSERT_EQUAL("https", url.GetField(TField::FieldScheme)); - url.FldMemSet(TField::FieldScheme, "HtTP"); - UNIT_ASSERT_EQUAL(TScheme::SchemeHTTP, url.GetScheme()); - UNIT_ASSERT_EQUAL("http", url.GetField(TField::FieldScheme)); - } - - { - const TString scheme = "http"; - const TString host = "host.com"; - const TString urlstr = scheme + "://" + host; - TTest test = { - urlstr, TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, scheme, "", "", host, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), urlstr + "/"); - } - } + { + TTest test = { + "http://host/path//", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "http", "", "", "host", 80, "/path/", "", ""}; + TUri url; + URL_TEST(url, test); + url.FldMemSet(TField::FieldScheme, "HTTPs"); + UNIT_ASSERT_EQUAL(TScheme::SchemeHTTPS, url.GetScheme()); + UNIT_ASSERT_EQUAL("https", url.GetField(TField::FieldScheme)); + url.FldMemSet(TField::FieldScheme, "HtTP"); + UNIT_ASSERT_EQUAL(TScheme::SchemeHTTP, url.GetScheme()); + UNIT_ASSERT_EQUAL("http", url.GetField(TField::FieldScheme)); + } + + { + const TString scheme = "http"; + const TString host = "host.com"; + const TString urlstr = scheme + "://" + host; + TTest test = { + urlstr, TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, scheme, "", "", host, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), urlstr + "/"); + } + } Y_UNIT_TEST(test08) { - { - TTest test = { - "mailto://user@host.com", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "mailto", "user", "", "host.com", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "host:/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "host", "", "", "", 0, "/path/.path/", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "host:1/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "host", 1, "/path/.path/", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "host:1/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "host", "", "", "", 0, "1/path/.path/.", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "/[foo]:bar", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "/[foo]:bar", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - ".", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - ".", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "././.", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "././.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "./path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "path", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "./path", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "path", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "../path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "../path", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "../path", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "../path", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "/../path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "/path", "", ""}; - TUri url; - URL_TEST(url, test); - } - } + { + TTest test = { + "mailto://user@host.com", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "mailto", "user", "", "host.com", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "host:/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "host", "", "", "", 0, "/path/.path/", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "host:1/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "host", 1, "/path/.path/", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "host:1/path/.path/.", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "host", "", "", "", 0, "1/path/.path/.", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "/[foo]:bar", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "/[foo]:bar", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + ".", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + ".", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "././.", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "././.", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "./path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "path", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "./path", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "path", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "../path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "../path", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "../path", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "", "", "", "", 0, "../path", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "/../path", TFeature::FeaturesAll, TState::ParsedOK, "", "", "", "", 0, "/path", "", ""}; + TUri url; + URL_TEST(url, test); + } + } Y_UNIT_TEST(test09) { - { - TTest test = { - "mailto:user@host.com", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "mailto", "", "", "", 0, "user@host.com", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "scheme:", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "scheme", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - { - TTest test = { - "scheme:", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "scheme", "", "", "", 0, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - } + { + TTest test = { + "mailto:user@host.com", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "mailto", "", "", "", 0, "user@host.com", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "scheme:", TFeature::FeaturesAll | TFeature::FeatureNoRelPath | TFeature::FeatureAllowRootless, TState::ParsedOK, "scheme", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + { + TTest test = { + "scheme:", TFeature::FeaturesAll | TFeature::FeatureAllowRootless, TState::ParsedOK, "scheme", "", "", "", 0, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + } Y_UNIT_TEST(test10) { - // test some escaping madness, note the ehost vs host - { - TString host = "президент.рф"; - TString ehost = "%D0%BF%D1%80%D0%B5%D0%B7%D0%B8%D0%B4%D0%B5%D0%BD%D1%82.%D1%80%D1%84"; - const TString urlstr = TString::Join("http://", host, "/"); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault | TFeature::FeatureCheckHost, TState::ParsedBadHost, "http", "", "", ehost, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } - - { - TString host = "%D0%BF%D1%80%D0%B5%D0%B7%D0%B8%D0%B4%D0%B5%D0%BD%D1%82.%D1%80%D1%84"; - const TString urlstr = TString::Join("http://", host, "/"); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault | TFeature::FeatureCheckHost, TState::ParsedBadHost, "http", "", "", host, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } - - { - TString host = "Фilip.ru"; - TString ehost = "%D0%A4ilip.ru"; - const TString urlstr = TString::Join("http://", host); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault, TState::ParsedBadHost, "http", "", "", ehost, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } - - { - TString host = "%D0%A4ilip.ru"; - const TString urlstr = TString::Join("http://", host); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault, TState::ParsedBadHost, "http", "", "", host, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - } - - { - TString host = "Filip%90.rЯ"; - TString ehost = "Filip%90.r%D0%AF"; - const TString urlstr = TString::Join(host, ":8080"); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeAllowed | TFeature::FeaturesDefault | TFeature::FeatureNoRelPath, TState::ParsedBadHost, "", "", "", ehost, 8080, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - - { - TString host = "Filip%90.r%D0%AF"; - const TString urlstr = TString::Join(host, ":8080"); - TTest test = { - urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeAllowed | TFeature::FeaturesDefault | TFeature::FeatureNoRelPath, TState::ParsedBadHost, "", "", "", host, 8080, "", "", ""}; - TUri url; - URL_TEST(url, test); - } - } + // test some escaping madness, note the ehost vs host + { + TString host = "президент.рф"; + TString ehost = "%D0%BF%D1%80%D0%B5%D0%B7%D0%B8%D0%B4%D0%B5%D0%BD%D1%82.%D1%80%D1%84"; + const TString urlstr = TString::Join("http://", host, "/"); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault | TFeature::FeatureCheckHost, TState::ParsedBadHost, "http", "", "", ehost, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } + + { + TString host = "%D0%BF%D1%80%D0%B5%D0%B7%D0%B8%D0%B4%D0%B5%D0%BD%D1%82.%D1%80%D1%84"; + const TString urlstr = TString::Join("http://", host, "/"); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault | TFeature::FeatureCheckHost, TState::ParsedBadHost, "http", "", "", host, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } + + { + TString host = "Фilip.ru"; + TString ehost = "%D0%A4ilip.ru"; + const TString urlstr = TString::Join("http://", host); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault, TState::ParsedBadHost, "http", "", "", ehost, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } + + { + TString host = "%D0%A4ilip.ru"; + const TString urlstr = TString::Join("http://", host); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeaturesDefault, TState::ParsedBadHost, "http", "", "", host, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + } + + { + TString host = "Filip%90.rЯ"; + TString ehost = "Filip%90.r%D0%AF"; + const TString urlstr = TString::Join(host, ":8080"); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeAllowed | TFeature::FeaturesDefault | TFeature::FeatureNoRelPath, TState::ParsedBadHost, "", "", "", ehost, 8080, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + + { + TString host = "Filip%90.r%D0%AF"; + const TString urlstr = TString::Join(host, ":8080"); + TTest test = { + urlstr, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeAllowed | TFeature::FeaturesDefault | TFeature::FeatureNoRelPath, TState::ParsedBadHost, "", "", "", host, 8080, "", "", ""}; + TUri url; + URL_TEST(url, test); + } + } Y_UNIT_TEST(test11) { - { - TTest test = { - "HtTp://HoSt/%50aTh/?Query#Frag", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "http", "", "", "host", 80, "/PaTh/", "Query", "Frag"}; - TUri url; - URL_TEST(url, test); - } - - { - TTest test = { - "HtTp://HoSt/%50a%54h/?Query#Frag", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TFeature::FeatureToLower), TState::ParsedOK, "http", "", "", "host", 80, "/path/", "query", "frag"}; - TUri url; - URL_TEST(url, test); - } - } + { + TTest test = { + "HtTp://HoSt/%50aTh/?Query#Frag", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedOK, "http", "", "", "host", 80, "/PaTh/", "Query", "Frag"}; + TUri url; + URL_TEST(url, test); + } + + { + TTest test = { + "HtTp://HoSt/%50a%54h/?Query#Frag", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TFeature::FeatureToLower), TState::ParsedOK, "http", "", "", "host", 80, "/path/", "query", "frag"}; + TUri url; + URL_TEST(url, test); + } + } Y_UNIT_TEST(test12) { - // test characters which are not always safe - { + // test characters which are not always safe + { #define RAW "/:" #define DEC "%2F:" #define ENC "%2F%3A" - TTest test = { - "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); + TTest test = { + "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); #undef RAW #undef DEC #undef ENC - } - { + } + { #define RAW "?@" #define DEC "%3F@" #define ENC "%3F%40" - TTest test = { - "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); + TTest test = { + "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); #undef RAW #undef DEC #undef ENC - } - { + } + { #define RAW "%&;=" #define DEC "%25&;=" #define ENC "%25%26%3B%3D" - TTest test = { - "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, ENC, ENC}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC); + TTest test = { + "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, ENC, ENC}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC); #undef RAW #undef DEC #undef ENC - } - { + } + { #define RAW "!$'()*," #define DEC "!$%27()*," #define ENC "%21%24%27%28%29%2A%2C" - TTest test = { - "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, DEC, DEC}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" DEC "#" DEC); + TTest test = { + "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, DEC, DEC}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" DEC "#" DEC); #undef RAW #undef DEC #undef ENC - } - { + } + { #define DEC "Череповец。рф" #define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" // punycode corresponds to lowercase #define PNC "xn--b1afab7bff7cb.xn--p1ai" - TTest test = { - "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); + TTest test = { + "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); #undef PNC #undef DEC #undef ENC - } - { + } + { #define DEC "Череповец。рф" #define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" // punycode corresponds to lowercase #define PNC "xn--b1afab7bff7cb.xn--p1ai" - TTest test = { - "http://" DEC "/" DEC "?" DEC "#" DEC, TParseFlags(TFeature::FeaturesRobot | TFeature::FeatureEncodeExtendedASCII), TState::ParsedOK, "http", "", "", PNC, 80, "/" ENC, ENC, ENC}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" PNC "/" ENC "?" ENC "#" ENC); + TTest test = { + "http://" DEC "/" DEC "?" DEC "#" DEC, TParseFlags(TFeature::FeaturesRobot | TFeature::FeatureEncodeExtendedASCII), TState::ParsedOK, "http", "", "", PNC, 80, "/" ENC, ENC, ENC}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" PNC "/" ENC "?" ENC "#" ENC); #undef PNC #undef DEC #undef ENC - } - { + } + { #define DEC "независимая-экспертиза-оценка-ущерба-авто-дтп.рф" #define PNC "xn--------3veabbbbjgk5abecc3afsad2cg8bvq2alouolqf5brd3a4jzftgqd.xn--p1ai" - TTest test = { - "http://" DEC "/", TParseFlags(TFeature::FeaturesRobot), TState::ParsedOK, "http", "", "", PNC, 80, "/", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" PNC "/"); + TTest test = { + "http://" DEC "/", TParseFlags(TFeature::FeaturesRobot), TState::ParsedOK, "http", "", "", PNC, 80, "/", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" PNC "/"); #undef PNC #undef DEC - } - } + } + } Y_UNIT_TEST(testFlexibleAuthority) { - TUri uri; - UNIT_ASSERT_EQUAL(uri.Parse("http://hello_world", TFeature::FeatureCheckHost), TState::ParsedBadHost); - UNIT_ASSERT_EQUAL(uri.Parse("http://hello_world", TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), "hello_world"); - - UNIT_ASSERT_EQUAL(uri.Parse("httpzzzzz://)(*&^$!\\][';<>`~,q?./index.html", TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), ")(*&^$!\\][';<>`~,q"); - UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldPath), ""); - UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldQuery), "./index.html"); - - UNIT_ASSERT_EQUAL(uri.Parse("htttttttp://)(*&^%45$!\\][';<>`~,.q/index.html", TFeature::FeatureSchemeFlexible), TState::ParsedOK); - UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), ")(*&^e$!\\][';<>`~,.q"); - UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldPath), "/index.html"); - UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldQuery), ""); - } + TUri uri; + UNIT_ASSERT_EQUAL(uri.Parse("http://hello_world", TFeature::FeatureCheckHost), TState::ParsedBadHost); + UNIT_ASSERT_EQUAL(uri.Parse("http://hello_world", TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), "hello_world"); + + UNIT_ASSERT_EQUAL(uri.Parse("httpzzzzz://)(*&^$!\\][';<>`~,q?./index.html", TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), ")(*&^$!\\][';<>`~,q"); + UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldPath), ""); + UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldQuery), "./index.html"); + + UNIT_ASSERT_EQUAL(uri.Parse("htttttttp://)(*&^%45$!\\][';<>`~,.q/index.html", TFeature::FeatureSchemeFlexible), TState::ParsedOK); + UNIT_ASSERT_VALUES_EQUAL(uri.GetHost(), ")(*&^e$!\\][';<>`~,.q"); + UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldPath), "/index.html"); + UNIT_ASSERT_VALUES_EQUAL(uri.GetField(TField::FieldQuery), ""); + } Y_UNIT_TEST(testSpecialChar) { - // test characters which are not always allowed - { - TTest test = { - "http://host/pa th", TFeature::FeaturesAll | TFeature::FeatureEncodeSpace, TState::ParsedOK, "http", "", "", "host", 80, "/pa%20th", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%20th"); - } - { - TTest test = { - "http://host/pa th", TFeature::FeaturesAll, TState::ParsedBadFormat, "http", "", "", "host", 80, "/pa th", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa th"); - } - { - TTest test = { - "http://host/pa%th%41", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/pa%25thA", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%25thA"); - } - { - TTest test = { - "http://host/invalid_second_char%az%1G", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/invalid_second_char%25az%251G", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/invalid_second_char%25az%251G"); - } - { - TTest test = { - "http://host/border%2", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/border%252", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/border%252"); - } - { - TTest test = { - "http://host/pa%th%41", TFeature::FeaturesAll, TState::ParsedBadFormat, "http", "", "", "host", 80, "/pa%thA", "", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%thA"); - } - } + // test characters which are not always allowed + { + TTest test = { + "http://host/pa th", TFeature::FeaturesAll | TFeature::FeatureEncodeSpace, TState::ParsedOK, "http", "", "", "host", 80, "/pa%20th", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%20th"); + } + { + TTest test = { + "http://host/pa th", TFeature::FeaturesAll, TState::ParsedBadFormat, "http", "", "", "host", 80, "/pa th", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa th"); + } + { + TTest test = { + "http://host/pa%th%41", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/pa%25thA", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%25thA"); + } + { + TTest test = { + "http://host/invalid_second_char%az%1G", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/invalid_second_char%25az%251G", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/invalid_second_char%25az%251G"); + } + { + TTest test = { + "http://host/border%2", TFeature::FeaturesAll | TFeature::FeatureEncodePercent, TState::ParsedOK, "http", "", "", "host", 80, "/border%252", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/border%252"); + } + { + TTest test = { + "http://host/pa%th%41", TFeature::FeaturesAll, TState::ParsedBadFormat, "http", "", "", "host", 80, "/pa%thA", "", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%thA"); + } + } Y_UNIT_TEST(testIPv6) { - { + { #define RAW "[1080:0:0:0:8:800:200C:417A]" #define DEC "[1080:0:0:0:8:800:200c:417a]" - TTest test = { - "http://" RAW "/" RAW "?" RAW "#" RAW, TParseFlags(TFeature::FeaturesAll), TState::ParsedOK, "http", "", "", DEC, 80, "/" RAW, RAW, RAW}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" RAW "?" RAW "#" RAW); + TTest test = { + "http://" RAW "/" RAW "?" RAW "#" RAW, TParseFlags(TFeature::FeaturesAll), TState::ParsedOK, "http", "", "", DEC, 80, "/" RAW, RAW, RAW}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" RAW "?" RAW "#" RAW); #undef DEC #undef RAW - } - } + } + } Y_UNIT_TEST(testEscapedFragment) { - { - TTest test = { - "http://host.com#!a=b&c=d#e+g%41%25", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureHashBangToEscapedFragment), TState::ParsedOK, "http", "", "", "host.com", 80, "/", "_escaped_fragment_=a=b%26c=d%23e%2BgA%2525", ""}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host.com/?_escaped_fragment_=a=b%26c=d%23e%2BgA%2525"); - } - { - TTest test = { - "http://host.com?_escaped_fragment_=a=b%26c=d%23e%2bg%2525", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureEscapedToHashBangFragment), TState::ParsedOK, "http", "", "", "host.com", 80, "/", "", "!a=b&c=d#e+g%25"}; - TUri url; - URL_TEST(url, test); - UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host.com/#!a=b&c=d#e+g%25"); - } - } + { + TTest test = { + "http://host.com#!a=b&c=d#e+g%41%25", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureHashBangToEscapedFragment), TState::ParsedOK, "http", "", "", "host.com", 80, "/", "_escaped_fragment_=a=b%26c=d%23e%2BgA%2525", ""}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host.com/?_escaped_fragment_=a=b%26c=d%23e%2BgA%2525"); + } + { + TTest test = { + "http://host.com?_escaped_fragment_=a=b%26c=d%23e%2bg%2525", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureEscapedToHashBangFragment), TState::ParsedOK, "http", "", "", "host.com", 80, "/", "", "!a=b&c=d#e+g%25"}; + TUri url; + URL_TEST(url, test); + UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host.com/#!a=b&c=d#e+g%25"); + } + } Y_UNIT_TEST(testReEncode) { - { - TStringStream out; - TUri::ReEncode(out, "foo bar"); - UNIT_ASSERT_VALUES_EQUAL(out.Str(), "foo%20bar"); - } - } - - static const TStringBuf NonRfcUrls[] = { - "http://deshevle.ru/price/price=&SrchTp=1&clID=24&BL=SrchTp=0|clID=24&frmID=75&SortBy=P&PreSort=&NmDir=0&VndDir=0&PrDir=0&SPP=44", - "http://secure.rollerwarehouse.com/skates/aggressive/skates/c/11[03]/tx/$$$+11[03][a-z]", - "http://secure.rollerwarehouse.com/skates/aggressive/skates/tx/$$$+110[a-z]", - "http://translate.google.com/translate_t?langpair=en|ru", - "http://www.garnier.com.ru/_ru/_ru/our_products/products_trade.aspx?tpcode=OUR_PRODUCTS^PRD_BODYCARE^EXTRA_SKIN^EXTRA_SKIN_BENEFITS", - "http://www.km.ru/magazin/view_print.asp?id={1846295A-223B-41DC-9F51-90D5D6236C49}", - "http://www.manutd.com/default.sps?pagegid={78F24B85-702C-4DC8-A5D4-2F67252C28AA}&itype=12977&pagebuildpageid=2716&bg=1", - "http://www.pokupay.ru/price/price=&SrchTp=1&clID=24&BL=SrchTp=0|clID=24&frmID=75&SPP=35&SortBy=N&PreSort=V&NmDir=0&VndDir=1&PrDir=0", - "http://www.rodnoyspb.ru/rest/plager/page[0].html", - "http://www.trinity.by/?section_id=46,47,48&cat=1&filters[]=2^_^Sony", - "http://translate.yandex.net/api/v1/tr.json/translate?lang=en-ru&text=>", - nullptr}; + { + TStringStream out; + TUri::ReEncode(out, "foo bar"); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), "foo%20bar"); + } + } + + static const TStringBuf NonRfcUrls[] = { + "http://deshevle.ru/price/price=&SrchTp=1&clID=24&BL=SrchTp=0|clID=24&frmID=75&SortBy=P&PreSort=&NmDir=0&VndDir=0&PrDir=0&SPP=44", + "http://secure.rollerwarehouse.com/skates/aggressive/skates/c/11[03]/tx/$$$+11[03][a-z]", + "http://secure.rollerwarehouse.com/skates/aggressive/skates/tx/$$$+110[a-z]", + "http://translate.google.com/translate_t?langpair=en|ru", + "http://www.garnier.com.ru/_ru/_ru/our_products/products_trade.aspx?tpcode=OUR_PRODUCTS^PRD_BODYCARE^EXTRA_SKIN^EXTRA_SKIN_BENEFITS", + "http://www.km.ru/magazin/view_print.asp?id={1846295A-223B-41DC-9F51-90D5D6236C49}", + "http://www.manutd.com/default.sps?pagegid={78F24B85-702C-4DC8-A5D4-2F67252C28AA}&itype=12977&pagebuildpageid=2716&bg=1", + "http://www.pokupay.ru/price/price=&SrchTp=1&clID=24&BL=SrchTp=0|clID=24&frmID=75&SPP=35&SortBy=N&PreSort=V&NmDir=0&VndDir=1&PrDir=0", + "http://www.rodnoyspb.ru/rest/plager/page[0].html", + "http://www.trinity.by/?section_id=46,47,48&cat=1&filters[]=2^_^Sony", + "http://translate.yandex.net/api/v1/tr.json/translate?lang=en-ru&text=>", + nullptr}; Y_UNIT_TEST(test_NonRfcUrls) { - TUri url; - const long flags = TFeature::FeaturesRobot; - for (size_t i = 0;; ++i) { - const TStringBuf& buf = NonRfcUrls[i]; - if (!buf.IsInited()) - break; - UNIT_ASSERT_VALUES_EQUAL(TState::ParsedOK, url.Parse(buf, flags)); - } - } - - static const TStringBuf CheckParseException[] = { - "http://www.'>'.com/?.net/", - nullptr}; + TUri url; + const long flags = TFeature::FeaturesRobot; + for (size_t i = 0;; ++i) { + const TStringBuf& buf = NonRfcUrls[i]; + if (!buf.IsInited()) + break; + UNIT_ASSERT_VALUES_EQUAL(TState::ParsedOK, url.Parse(buf, flags)); + } + } + + static const TStringBuf CheckParseException[] = { + "http://www.'>'.com/?.net/", + nullptr}; Y_UNIT_TEST(test_CheckParseException) { - TUri url; - const long flags = TFeature::FeaturesRobot | TFeature::FeaturesEncode; - for (size_t i = 0;; ++i) { - const TStringBuf& buf = CheckParseException[i]; - if (!buf.IsInited()) - break; - TString what; - try { - // we care only about exceptions, not whether it parses correctly - url.Parse(buf, flags); - continue; - } catch (const std::exception& exc) { - what = exc.what(); - } catch (...) { - what = "exception thrown"; - } - ythrow yexception() << "failed to parse URL [" << buf << "]: " << what; - } + TUri url; + const long flags = TFeature::FeaturesRobot | TFeature::FeaturesEncode; + for (size_t i = 0;; ++i) { + const TStringBuf& buf = CheckParseException[i]; + if (!buf.IsInited()) + break; + TString what; + try { + // we care only about exceptions, not whether it parses correctly + url.Parse(buf, flags); + continue; + } catch (const std::exception& exc) { + what = exc.what(); + } catch (...) { + what = "exception thrown"; + } + ythrow yexception() << "failed to parse URL [" << buf << "]: " << what; + } } Y_UNIT_TEST(test_PrintPort) { - TUri uri; - { - uri.Parse("http://srv.net:9100/print", TFeature::FeaturesRecommended); - TString s = uri.PrintS(TUri::FlagPort); - Cdbg << uri.PrintS() << ',' << uri.PrintS(TUri::FlagPort) << Endl; - UNIT_ASSERT_VALUES_EQUAL(9100, FromString<ui32>(s)); - } - { - uri.Parse("http://srv.net:80/print", TFeature::FeaturesRecommended); - TString s = uri.PrintS(TUri::FlagPort); - Cdbg << uri.PrintS() << ',' << uri.PrintS(TUri::FlagPort) << Endl; - UNIT_ASSERT(s.Empty()); - } - } - + TUri uri; + { + uri.Parse("http://srv.net:9100/print", TFeature::FeaturesRecommended); + TString s = uri.PrintS(TUri::FlagPort); + Cdbg << uri.PrintS() << ',' << uri.PrintS(TUri::FlagPort) << Endl; + UNIT_ASSERT_VALUES_EQUAL(9100, FromString<ui32>(s)); + } + { + uri.Parse("http://srv.net:80/print", TFeature::FeaturesRecommended); + TString s = uri.PrintS(TUri::FlagPort); + Cdbg << uri.PrintS() << ',' << uri.PrintS(TUri::FlagPort) << Endl; + UNIT_ASSERT(s.Empty()); + } + } + Y_UNIT_TEST(test_ParseFailures) { - { - TTest test = { - "http://host:port", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "http://javascript:alert(hi)", TFeature::FeaturesRobot, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "http://host::0", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "http://host ", TFeature::FeaturesAll, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "http:00..03", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "host:00..03", TFeature::FeaturesAll, TState::ParsedRootless, "host", "", "", "", 0, "", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "http://roduct;isbn,0307371549;at,aid4c00179ab018www.mcnamarasband.wordpress.com/", TFeature::FeaturesAll, TState::ParsedBadHost, "http", "", "", "roduct;isbn,0307371549;at,aid4c00179ab018www.mcnamarasband.wordpress.com", 80, "/", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - { - TTest test = { - "invalid url", TFeature::FeaturesDefault, TState::ParsedBadFormat, "", "", "", "", 0, "invalid url", "", ""}; - TUri url(-1); - URL_TEST(url, test); - } - } + { + TTest test = { + "http://host:port", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "http://javascript:alert(hi)", TFeature::FeaturesRobot, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "http://host::0", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "http://host ", TFeature::FeaturesAll, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "http:00..03", TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TState::ParsedBadFormat, "", "", "", "", Max<ui16>(), "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "host:00..03", TFeature::FeaturesAll, TState::ParsedRootless, "host", "", "", "", 0, "", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "http://roduct;isbn,0307371549;at,aid4c00179ab018www.mcnamarasband.wordpress.com/", TFeature::FeaturesAll, TState::ParsedBadHost, "http", "", "", "roduct;isbn,0307371549;at,aid4c00179ab018www.mcnamarasband.wordpress.com", 80, "/", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + { + TTest test = { + "invalid url", TFeature::FeaturesDefault, TState::ParsedBadFormat, "", "", "", "", 0, "invalid url", "", ""}; + TUri url(-1); + URL_TEST(url, test); + } + } Y_UNIT_TEST(test_scheme_related_url) { TUri url; UNIT_ASSERT_VALUES_EQUAL(url.Parse("//www.hostname.ru/path", TFeature::FeaturesRobot), TState::ParsedOK); @@ -903,120 +903,120 @@ namespace NUri { Y_UNIT_TEST_SUITE(TInvertDomainTest) { Y_UNIT_TEST(TestInvert) { - TString a; - UNIT_ASSERT_EQUAL(InvertDomain(a), ""); - TString aa(".:/foo"); - UNIT_ASSERT_EQUAL(InvertDomain(aa), ".:/foo"); - TString aaa("/foo.bar:"); - UNIT_ASSERT_EQUAL(InvertDomain(aaa), "/foo.bar:"); - TString b("ru"); - UNIT_ASSERT_EQUAL(InvertDomain(b), "ru"); - TString c(".ru"); - UNIT_ASSERT_EQUAL(InvertDomain(c), "ru."); - TString d("ru."); - UNIT_ASSERT_EQUAL(InvertDomain(d), ".ru"); - TString e("www.yandex.ru:80/yandsearch?text=foo"); - UNIT_ASSERT_EQUAL(InvertDomain(e), "ru.yandex.www:80/yandsearch?text=foo"); - TString f("www.yandex.ru:80/yandsearch?text=foo"); - InvertDomain(f.begin(), f.begin() + 10); - UNIT_ASSERT_EQUAL(f, "yandex.www.ru:80/yandsearch?text=foo"); - TString g("https://www.yandex.ru:80//"); - UNIT_ASSERT_EQUAL(InvertDomain(g), "https://ru.yandex.www:80//"); - TString h("www.yandex.ru:8080/redir.pl?url=https://google.com/"); - UNIT_ASSERT_EQUAL(InvertDomain(h), "ru.yandex.www:8080/redir.pl?url=https://google.com/"); - } + TString a; + UNIT_ASSERT_EQUAL(InvertDomain(a), ""); + TString aa(".:/foo"); + UNIT_ASSERT_EQUAL(InvertDomain(aa), ".:/foo"); + TString aaa("/foo.bar:"); + UNIT_ASSERT_EQUAL(InvertDomain(aaa), "/foo.bar:"); + TString b("ru"); + UNIT_ASSERT_EQUAL(InvertDomain(b), "ru"); + TString c(".ru"); + UNIT_ASSERT_EQUAL(InvertDomain(c), "ru."); + TString d("ru."); + UNIT_ASSERT_EQUAL(InvertDomain(d), ".ru"); + TString e("www.yandex.ru:80/yandsearch?text=foo"); + UNIT_ASSERT_EQUAL(InvertDomain(e), "ru.yandex.www:80/yandsearch?text=foo"); + TString f("www.yandex.ru:80/yandsearch?text=foo"); + InvertDomain(f.begin(), f.begin() + 10); + UNIT_ASSERT_EQUAL(f, "yandex.www.ru:80/yandsearch?text=foo"); + TString g("https://www.yandex.ru:80//"); + UNIT_ASSERT_EQUAL(InvertDomain(g), "https://ru.yandex.www:80//"); + TString h("www.yandex.ru:8080/redir.pl?url=https://google.com/"); + UNIT_ASSERT_EQUAL(InvertDomain(h), "ru.yandex.www:8080/redir.pl?url=https://google.com/"); + } } - TQueryArg::EProcessed ProcessQargs(TString url, TString& processed, TQueryArgFilter filter = 0, void* filterData = 0) { - TUri uri; - uri.Parse(url, NUri::TFeature::FeaturesRecommended); + TQueryArg::EProcessed ProcessQargs(TString url, TString& processed, TQueryArgFilter filter = 0, void* filterData = 0) { + TUri uri; + uri.Parse(url, NUri::TFeature::FeaturesRecommended); - TQueryArgProcessing processing(TQueryArg::FeatureSortByName | (filter ? TQueryArg::FeatureFilter : 0) | TQueryArg::FeatureRewriteDirty, filter, filterData); - auto result = processing.Process(uri); - processed = uri.PrintS(); - return result; - } + TQueryArgProcessing processing(TQueryArg::FeatureSortByName | (filter ? TQueryArg::FeatureFilter : 0) | TQueryArg::FeatureRewriteDirty, filter, filterData); + auto result = processing.Process(uri); + processed = uri.PrintS(); + return result; + } - TString SortQargs(TString url) { - TString r; - ProcessQargs(url, r); - return r; + TString SortQargs(TString url) { + TString r; + ProcessQargs(url, r); + return r; } - bool QueryArgsFilter(const TQueryArg& arg, void* filterData) { - const char* skipName = static_cast<const char*>(filterData); - return arg.Name != skipName; - } + bool QueryArgsFilter(const TQueryArg& arg, void* filterData) { + const char* skipName = static_cast<const char*>(filterData); + return arg.Name != skipName; + } - TString FilterQargs(TString url, const char* name) { - TString r; - ProcessQargs(url, r, &QueryArgsFilter, const_cast<char*>(name)); - return r; - } + TString FilterQargs(TString url, const char* name) { + TString r; + ProcessQargs(url, r, &QueryArgsFilter, const_cast<char*>(name)); + return r; + } Y_UNIT_TEST_SUITE(QargsTest) { Y_UNIT_TEST(TestSorting) { - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/"), "http://ya.ru/"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?"), "http://ya.ru/?"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?some=value"), "http://ya.ru/?some=value"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=2"), "http://ya.ru/?a=2&b=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=2&a=3"), "http://ya.ru/?a=3&a=2&b=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/"), "http://ya.ru/"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?"), "http://ya.ru/?"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?some=value"), "http://ya.ru/?some=value"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=2"), "http://ya.ru/?a=2&b=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=2&a=3"), "http://ya.ru/?a=3&a=2&b=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?aaa=3&b=b&a=1&aa=2"), "http://ya.ru/?a=1&aa=2&aaa=3&b=b"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?aaa=3&b=b&a=1&aa=2"), "http://ya.ru/?a=1&aa=2&aaa=3&b=b"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?a=1&b=1&c=1"), "http://ya.ru/?a=1&b=1&c=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=1&c=1"), "http://ya.ru/?a=1&b=1&c=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?c=1&a=1&b=1"), "http://ya.ru/?a=1&b=1&c=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?a=1&b=1&c=1"), "http://ya.ru/?a=1&b=1&c=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b=1&a=1&c=1"), "http://ya.ru/?a=1&b=1&c=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?c=1&a=1&b=1"), "http://ya.ru/?a=1&b=1&c=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?c=1&a=1&a=1&b=1&c=1&b=1"), "http://ya.ru/?a=1&a=1&b=1&b=1&c=1&c=1"); + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?c=1&a=1&a=1&b=1&c=1&b=1"), "http://ya.ru/?a=1&a=1&b=1&b=1&c=1&c=1"); - UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b==&a=&&c="), "http://ya.ru/?a=&b==&c="); - } + UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?b==&a=&&c="), "http://ya.ru/?a=&b==&c="); + } Y_UNIT_TEST(TestParsingCorners) { - TString s; + TString s; UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?=", s), TQueryArg::ProcessedOK); UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some", s), TQueryArg::ProcessedOK); UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some=", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?&", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?&&", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some=", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some==", s), TQueryArg::ProcessedOK); - UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some=&&", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?&", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?&&", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some=", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some==", s), TQueryArg::ProcessedOK); + UNIT_ASSERT_EQUAL(ProcessQargs("http://ya.ru/?some=&&", s), TQueryArg::ProcessedOK); UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?="), "http://ya.ru/?="); UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?some=="), "http://ya.ru/?some=="); UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?&&"), "http://ya.ru/?&&"); UNIT_ASSERT_STRINGS_EQUAL(SortQargs("http://ya.ru/?a"), "http://ya.ru/?a"); - } + } Y_UNIT_TEST(TestFiltering) { - UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?some=value", "missing"), "http://ya.ru/?some=value"); - UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?b=1&a=2", "b"), "http://ya.ru/?a=2"); - UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?b=1&a=2&a=3", "a"), "http://ya.ru/?b=1"); - UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?some=&another=", "another"), "http://ya.ru/?some="); - } + UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?some=value", "missing"), "http://ya.ru/?some=value"); + UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?b=1&a=2", "b"), "http://ya.ru/?a=2"); + UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?b=1&a=2&a=3", "a"), "http://ya.ru/?b=1"); + UNIT_ASSERT_STRINGS_EQUAL(FilterQargs("http://ya.ru/?some=&another=", "another"), "http://ya.ru/?some="); + } Y_UNIT_TEST(TestRemoveEmptyFeature) { - TUri uri; - uri.Parse("http://ya.ru/?", NUri::TFeature::FeaturesRecommended); + TUri uri; + uri.Parse("http://ya.ru/?", NUri::TFeature::FeaturesRecommended); - TQueryArgProcessing processing(TQueryArg::FeatureRemoveEmptyQuery | TQueryArg::FeatureRewriteDirty); - auto result = processing.Process(uri); - UNIT_ASSERT_EQUAL(result, TQueryArg::ProcessedOK); - UNIT_ASSERT_STRINGS_EQUAL(uri.PrintS(), "http://ya.ru/"); - } + TQueryArgProcessing processing(TQueryArg::FeatureRemoveEmptyQuery | TQueryArg::FeatureRewriteDirty); + auto result = processing.Process(uri); + UNIT_ASSERT_EQUAL(result, TQueryArg::ProcessedOK); + UNIT_ASSERT_STRINGS_EQUAL(uri.PrintS(), "http://ya.ru/"); + } Y_UNIT_TEST(TestNoRemoveEmptyFeature) { - TUri uri; - uri.Parse("http://ya.ru/?", NUri::TFeature::FeaturesRecommended); - - TQueryArgProcessing processing(0); - auto result = processing.Process(uri); - UNIT_ASSERT_EQUAL(result, TQueryArg::ProcessedOK); - UNIT_ASSERT_STRINGS_EQUAL(uri.PrintS(), "http://ya.ru/?"); - } - } + TUri uri; + uri.Parse("http://ya.ru/?", NUri::TFeature::FeaturesRecommended); + + TQueryArgProcessing processing(0); + auto result = processing.Process(uri); + UNIT_ASSERT_EQUAL(result, TQueryArg::ProcessedOK); + UNIT_ASSERT_STRINGS_EQUAL(uri.PrintS(), "http://ya.ru/?"); + } + } } diff --git a/library/cpp/uri/uri_ut.h b/library/cpp/uri/uri_ut.h index 699f2d1581..f8ac6e4092 100644 --- a/library/cpp/uri/uri_ut.h +++ b/library/cpp/uri/uri_ut.h @@ -5,19 +5,19 @@ #include <library/cpp/testing/unittest/registar.h> namespace NUri { - struct TTest { - TStringBuf Val; - TParseFlags Flags; - TState::EParsed State; - TStringBuf Scheme; - TStringBuf User; - TStringBuf Pass; - TStringBuf Host; - ui16 Port; - TStringBuf Path; - TStringBuf Query; - TStringBuf Frag; - }; + struct TTest { + TStringBuf Val; + TParseFlags Flags; + TState::EParsed State; + TStringBuf Scheme; + TStringBuf User; + TStringBuf Pass; + TStringBuf Host; + ui16 Port; + TStringBuf Path; + TStringBuf Query; + TStringBuf Frag; + }; } @@ -29,53 +29,53 @@ namespace NUri { UNIT_ASSERT_UNEQUAL_C(url, url2, URL_MSG(url1, url2, "==")) #define CMP_FLD(url, test, fld) \ - UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::Field##fld), test.fld) + UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::Field##fld), test.fld) -#define CMP_URL(url, test) \ - do { \ - CMP_FLD(url, test, Scheme); \ - CMP_FLD(url, test, User); \ - CMP_FLD(url, test, Pass); \ - CMP_FLD(url, test, Host); \ - UNIT_ASSERT_VALUES_EQUAL(url.GetPort(), test.Port); \ - CMP_FLD(url, test, Path); \ - CMP_FLD(url, test, Query); \ - CMP_FLD(url, test, Frag); \ - } while (false) +#define CMP_URL(url, test) \ + do { \ + CMP_FLD(url, test, Scheme); \ + CMP_FLD(url, test, User); \ + CMP_FLD(url, test, Pass); \ + CMP_FLD(url, test, Host); \ + UNIT_ASSERT_VALUES_EQUAL(url.GetPort(), test.Port); \ + CMP_FLD(url, test, Path); \ + CMP_FLD(url, test, Query); \ + CMP_FLD(url, test, Frag); \ + } while (false) -#define URL_TEST_ENC(url, test, enc) \ - do { \ - TState::EParsed st = url.ParseUri(test.Val, test.Flags, 0, enc); \ - UNIT_ASSERT_VALUES_EQUAL(st, test.State); \ - CMP_URL(url, test); \ - if (TState::ParsedOK != st) \ - break; \ - TUri _url; \ - TString urlstr, urlstr2; \ - urlstr = url.PrintS(); \ - TState::EParsed st2 = _url.ParseUri(urlstr, \ - (test.Flags & ~TFeature::FeatureNoRelPath) | TFeature::FeatureAllowRootless, 0, enc); \ - if (TState::ParsedEmpty != st2) \ - UNIT_ASSERT_VALUES_EQUAL(st2, test.State); \ - urlstr2 = _url.PrintS(); \ - UNIT_ASSERT_VALUES_EQUAL(urlstr, urlstr2); \ - CMP_URL(_url, test); \ - UNIT_ASSERT_VALUES_EQUAL(url.GetUrlFieldMask(), _url.GetUrlFieldMask()); \ - URL_EQ(url, _url); \ - const TStringBuf hostascii = url.GetField(TField::FieldHostAscii); \ - if (hostascii.Empty()) \ - break; \ - urlstr = url.PrintS(TField::FlagHostAscii); \ - st2 = _url.ParseUri(urlstr, \ - (test.Flags & ~TFeature::FeatureNoRelPath) | TFeature::FeatureAllowRootless, 0, enc); \ - UNIT_ASSERT_VALUES_EQUAL(st2, test.State); \ - urlstr2 = _url.PrintS(); \ - UNIT_ASSERT_VALUES_EQUAL(urlstr, urlstr2); \ - TTest test2 = test; \ - test2.Host = hostascii; \ - CMP_URL(_url, test2); \ - UNIT_ASSERT_VALUES_EQUAL(url.GetUrlFieldMask(), _url.GetUrlFieldMask()); \ - } while (false) +#define URL_TEST_ENC(url, test, enc) \ + do { \ + TState::EParsed st = url.ParseUri(test.Val, test.Flags, 0, enc); \ + UNIT_ASSERT_VALUES_EQUAL(st, test.State); \ + CMP_URL(url, test); \ + if (TState::ParsedOK != st) \ + break; \ + TUri _url; \ + TString urlstr, urlstr2; \ + urlstr = url.PrintS(); \ + TState::EParsed st2 = _url.ParseUri(urlstr, \ + (test.Flags & ~TFeature::FeatureNoRelPath) | TFeature::FeatureAllowRootless, 0, enc); \ + if (TState::ParsedEmpty != st2) \ + UNIT_ASSERT_VALUES_EQUAL(st2, test.State); \ + urlstr2 = _url.PrintS(); \ + UNIT_ASSERT_VALUES_EQUAL(urlstr, urlstr2); \ + CMP_URL(_url, test); \ + UNIT_ASSERT_VALUES_EQUAL(url.GetUrlFieldMask(), _url.GetUrlFieldMask()); \ + URL_EQ(url, _url); \ + const TStringBuf hostascii = url.GetField(TField::FieldHostAscii); \ + if (hostascii.Empty()) \ + break; \ + urlstr = url.PrintS(TField::FlagHostAscii); \ + st2 = _url.ParseUri(urlstr, \ + (test.Flags & ~TFeature::FeatureNoRelPath) | TFeature::FeatureAllowRootless, 0, enc); \ + UNIT_ASSERT_VALUES_EQUAL(st2, test.State); \ + urlstr2 = _url.PrintS(); \ + UNIT_ASSERT_VALUES_EQUAL(urlstr, urlstr2); \ + TTest test2 = test; \ + test2.Host = hostascii; \ + CMP_URL(_url, test2); \ + UNIT_ASSERT_VALUES_EQUAL(url.GetUrlFieldMask(), _url.GetUrlFieldMask()); \ + } while (false) #define URL_TEST(url, test) \ URL_TEST_ENC(url, test, CODES_UTF8) diff --git a/library/cpp/uri/ut/ya.make b/library/cpp/uri/ut/ya.make index 14cc4e01d0..b2b2c1291a 100644 --- a/library/cpp/uri/ut/ya.make +++ b/library/cpp/uri/ut/ya.make @@ -2,10 +2,10 @@ UNITTEST_FOR(library/cpp/uri) OWNER(leo) -NO_OPTIMIZE() +NO_OPTIMIZE() NO_WSHADOW() - + PEERDIR( library/cpp/html/entity ) diff --git a/library/cpp/uri/ya.make b/library/cpp/uri/ya.make index 8cd5ef25de..8fc808a6af 100644 --- a/library/cpp/uri/ya.make +++ b/library/cpp/uri/ya.make @@ -1,10 +1,10 @@ -LIBRARY() +LIBRARY() OWNER( mvel g:base ) - + SRCS( assign.cpp common.cpp @@ -15,8 +15,8 @@ SRCS( parse.cpp qargs.cpp uri.cpp - encodefsm.rl6 - parsefsm.rl6 + encodefsm.rl6 + parsefsm.rl6 ) PEERDIR( |