diff options
author | albert <albert@yandex-team.ru> | 2022-02-10 16:48:14 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:14 +0300 |
commit | 9f25ef3232c288ca664ceee6c376cf64e4349a2e (patch) | |
tree | b192eaf3150845f7302fafd460a972b0439d6fe5 /library/cpp/uri | |
parent | 6a1e535429145ec1ecfbc5f1efd3c95323261fb5 (diff) | |
download | ydb-9f25ef3232c288ca664ceee6c376cf64e4349a2e.tar.gz |
Restoring authorship annotation for <albert@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/uri')
-rw-r--r-- | library/cpp/uri/assign.cpp | 160 | ||||
-rw-r--r-- | library/cpp/uri/common.cpp | 20 | ||||
-rw-r--r-- | library/cpp/uri/common.h | 174 | ||||
-rw-r--r-- | library/cpp/uri/encode.cpp | 110 | ||||
-rw-r--r-- | library/cpp/uri/encode.h | 50 | ||||
-rw-r--r-- | library/cpp/uri/encodefsm.rl6 | 46 | ||||
-rw-r--r-- | library/cpp/uri/http_url.h | 100 | ||||
-rw-r--r-- | library/cpp/uri/other.cpp | 6 | ||||
-rw-r--r-- | library/cpp/uri/parse.cpp | 70 | ||||
-rw-r--r-- | library/cpp/uri/parse.h | 136 | ||||
-rw-r--r-- | library/cpp/uri/parsefsm.rl6 | 870 | ||||
-rw-r--r-- | library/cpp/uri/uri-ru_ut.cpp | 24 | ||||
-rw-r--r-- | library/cpp/uri/uri.cpp | 114 | ||||
-rw-r--r-- | library/cpp/uri/uri.h | 182 | ||||
-rw-r--r-- | library/cpp/uri/uri_ut.cpp | 162 | ||||
-rw-r--r-- | library/cpp/uri/uri_ut.h | 42 | ||||
-rw-r--r-- | library/cpp/uri/ya.make | 30 |
17 files changed, 1148 insertions, 1148 deletions
diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp index ae9125c727..ff29afc10d 100644 --- a/library/cpp/uri/assign.cpp +++ b/library/cpp/uri/assign.cpp @@ -1,16 +1,16 @@ -#include "uri.h" -#include "parse.h" - -#include <contrib/libs/libidn/idna.h> +#include "uri.h" +#include "parse.h" +#include <contrib/libs/libidn/idna.h> + #include <library/cpp/charset/recyr.hh> -#include <util/charset/wide.h> -#include <util/memory/tempbuf.h> -#include <util/string/cast.h> -#include <util/system/yassert.h> +#include <util/charset/wide.h> +#include <util/memory/tempbuf.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> #include <util/system/sys_alloc.h> -namespace NUri { +namespace NUri { TMallocPtr<char> TUri::IDNToAscii(const wchar32* idna) { // XXX: don't use punycode_encode directly as it doesn't include // proper stringprep and splitting on dot-equivalent characters @@ -24,21 +24,21 @@ namespace NUri { TMallocPtr<char> TUri::IDNToAscii(const TStringBuf& host, ECharset enc) { TTempBuf buf(sizeof(wchar32) * (1 + host.length())); wchar32* wbuf = reinterpret_cast<wchar32*>(buf.Data()); - + const size_t written = NDetail::NBaseOps::Recode(host, wbuf, enc).length(); wbuf[written] = 0; - + return IDNToAscii(wbuf); } - + TStringBuf TUri::HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc) { TStringBuf outhost; // store the result here before returning it, to get RVO - + size_t buflen = 0; - + if (hasExtended && !allowIDN) return outhost; // definitely can't convert - + // charset-recode: RFC 3986, 3.2.2, requires percent-encoded non-ASCII // chars in reg-name to be UTF-8 so convert to UTF-8 prior to decoding const bool recoding = CODES_UTF8 != enc && hasExtended; @@ -50,7 +50,7 @@ namespace NUri { return outhost; host = TStringBuf(buf.Get(), nwr); } - + // percent-decode if (0 == buflen) { buflen = host.length(); @@ -64,22 +64,22 @@ namespace NUri { // check again if (hasExtended && !allowIDN) - return outhost; - + return outhost; + host = out.Str(); - + // convert to punycode if needed if (!hasExtended) { outhost = host; return outhost; } - + TMallocPtr<char> puny; try { puny = IDNToAscii(host); } catch (const yexception& /* exc */) { } - + if (!puny) { // XXX: try user charset unless UTF8 or converted to it if (CODES_UTF8 == enc || recoding) @@ -92,50 +92,50 @@ namespace NUri { if (!puny) return outhost; } - + buf = puny; outhost = buf.Get(); - + return outhost; - } - + } + TStringBuf TUri::HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc) { // find what we have long haveFlags = 0; for (size_t i = 0; i != host.length(); ++i) haveFlags |= TEncoder::GetFlags(host[i]).FeatFlags; - + // interested in encoded characters or (if IDN is allowed) extended ascii TStringBuf outhost; const bool haveExtended = haveFlags & FeatureEncodeExtendedASCII; - + if (!haveExtended || allowIDN) { if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) outhost = host; else outhost = HostToAscii(host, buf, haveExtended, allowIDN, enc); } - + return outhost; } - + static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField fld, const TStringBuf& val, long flags) { if (val.empty()) return false; if (flags & TFeature::FeaturesAllEncoder) TUri::ReEncodeField(out, val, fld, flags); - else + else out << val; return true; - } - + } + TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defscheme) { Clear(); - + TState::EParsed ret = parser.State; if (ParsedBadFormat <= ret) return ret; - + const TSection& scheme = parser.Get(FieldScheme); const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme); @@ -154,9 +154,9 @@ namespace NUri { long flags = parser.Flags.Allow; if (convertIDN) flags |= FeatureAllowHostIDN | FeatureCheckHost; - + // process non-ASCII host for punycode - + TMallocPtr<char> hostptr; TStringBuf hostascii; // empty: use host field; non-empty: ascii bool hostConverted = false; // hostascii is empty or the original @@ -164,13 +164,13 @@ namespace NUri { if (host.IsSet() && !FldIsSet(FieldHost)) { const bool allowIDN = (flags & FeatureAllowHostIDN); const TStringBuf hostbuf = host.Get(); - + // if we know we have and allow extended-ASCII chars, no need to check further if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) hostascii = HostToAscii(hostbuf, hostptr, true, true, parser.Enc); else hostascii = HostToAscii(hostbuf, hostptr, allowIDN, parser.Enc); - + if (hostascii.empty()) ret = ParsedBadHost; // exists but cannot be converted else if (hostbuf.data() != hostascii.data()) { @@ -180,21 +180,21 @@ namespace NUri { FldMarkSet(FieldHost); // so that we don't process host below } } - + // add unprocessed fields - + for (int idx = 0; idx < FieldUrlMAX; ++idx) { const EField fld = EField(idx); const TSection& section = parser.Get(fld); if (section.IsSet() && !FldIsSet(fld)) buflen += 1 + section.EncodedLen(); // includes null - } + } if (0 == buflen) // no more sections set? return ret; - + // process #! fragments // https://developers.google.com/webmasters/ajax-crawling/docs/specification - + static const TStringBuf escFragPrefix(TStringBuf("_escaped_fragment_=")); bool encHashBangFrag = false; @@ -203,7 +203,7 @@ namespace NUri { do { if (FldIsSet(FieldFrag) || FldIsSet(FieldQuery)) break; - + const TSection& frag = parser.Get(FieldFrag); if (frag.IsSet()) { if (0 == (parser.Flags & FeatureHashBangToEscapedFragment)) @@ -232,33 +232,33 @@ namespace NUri { buflen -= escFragPrefix.length(); } } while (false); - + // now set all fields prior to validating - + Alloc(buflen); - + TMemoryWriteBuffer out(Buffer.data(), Buffer.size()); for (int idx = 0; idx < FieldUrlMAX; ++idx) { const EField fld = EField(idx); - + const TSection& section = parser.Get(fld); if (!section.IsSet() || FldIsSet(fld)) continue; - + if (FieldQuery == fld && encHashBangFrag) continue; - + if (FieldFrag == fld && qryEscapedFragment.IsInited()) continue; char* beg = out.Buf(); TStringBuf val = section.Get(); long careFlags = section.GetFlagsEncode(); - + switch (fld) { default: break; - + case FieldQuery: if (qryEscapedFragment.IsInited()) { const EField dstfld = FieldFrag; // that's where we will store @@ -273,7 +273,7 @@ namespace NUri { val = qryBeforeEscapedFragment; } break; - + case FieldFrag: if (encHashBangFrag) { const EField dstfld = FieldQuery; // that's where we will store @@ -289,7 +289,7 @@ namespace NUri { } break; } - + AppendField(out, fld, val, careFlags); char* end = out.Buf(); @@ -300,7 +300,7 @@ namespace NUri { Y_ASSERT(beg >= out.Beg()); out.SetPos(end); } - + FldSetNoDirty(fld, TStringBuf(beg, end)); // special character case @@ -309,7 +309,7 @@ namespace NUri { const long allowChars = parser.GetFieldFlags(fld) & checkChars; if (checkChars != allowChars) ret = ParsedBadFormat; - } + } out << '\0'; } @@ -324,28 +324,28 @@ namespace NUri { } Buffer.Resize(out.Len()); - + if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) { if (SchemeUnknown == defscheme) ret = ParsedBadScheme; else SetSchemeImpl(defscheme); } - + if (0 == (parser.Flags & FeatureAllowEmptyPath)) CheckMissingFields(); - + const TStringBuf& port = GetField(FieldPort); if (!port.empty()) { if (!TryFromString<ui16>(port, Port)) ret = ParsedBadPort; - } + } if (ParsedOK != ret) return ret; - + // run validity checks now that all fields are set - + // check the host for DNS compliance do { if (0 == (flags & FeatureCheckHost)) @@ -363,28 +363,28 @@ namespace NUri { } while (false); return ret; - } - + } + TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { Clear(); - + if (url.empty()) return ParsedEmpty; if (maxlen > 0 && url.length() > maxlen) return ParsedTooLong; - + const TParser parser(flags, url, enc); - + return AssignImpl(parser, defscheme); } TState::EParsed TUri::Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& url_base, ui32 maxlen, ECharset enc) { const TParseFlags flags1 = flags.Exclude(FeatureNoRelPath); TState::EParsed ret = ParseImpl(url, url_base.empty() ? flags : flags1, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; - + if (ParsedOK != ret) + return ret; + if (!url_base.empty() && !IsValidAbs()) { TUri base; ret = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc); @@ -394,19 +394,19 @@ namespace NUri { } Rewrite(); - return ret; + return ret; } - + TState::EParsed TUri::Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags, ui32 maxlen, ECharset enc) { const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; - + if (ParsedOK != ret) + return ret; + if (!IsValidAbs()) Merge(base, PathOperationFlag(flags)); - + Rewrite(); - return ret; + return ret; } TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { @@ -414,12 +414,12 @@ namespace NUri { url, flags | FeatureNoRelPath, maxlen, defscheme, enc); if (ParsedOK != ret) return ret; - + if (IsNull(FlagHost)) return ParsedBadHost; Rewrite(); return ParsedOK; } - -} + +} diff --git a/library/cpp/uri/common.cpp b/library/cpp/uri/common.cpp index 05af1e57d1..3f91c34cad 100644 --- a/library/cpp/uri/common.cpp +++ b/library/cpp/uri/common.cpp @@ -1,11 +1,11 @@ -#include "common.h" +#include "common.h" -#include <util/generic/map.h> +#include <util/generic/map.h> #include <util/generic/singleton.h> -namespace NUri { +namespace NUri { static_assert(TFeature::FeatureMAX <= sizeof(unsigned long) * 8, "expect TFeature::FeatureMAX <= sizeof(unsigned long) * 8"); - + const TSchemeInfo TSchemeInfo::Registry[] = { TSchemeInfo(TScheme::SchemeEmpty, TStringBuf()), // scheme is empty and inited TSchemeInfo(TScheme::SchemeHTTP, TStringBuf("http"), TField::FlagHost | TField::FlagPath, 80), @@ -17,18 +17,18 @@ namespace NUri { // add above TSchemeInfo(TScheme::SchemeUnknown, TStringBuf()) // scheme is empty and uninited }; - + namespace { struct TLessNoCase { bool operator()(const TStringBuf& lt, const TStringBuf& rt) const { return 0 > CompareNoCase(lt, rt); } }; - + class TSchemeInfoMap { typedef TMap<TStringBuf, TScheme::EKind, TLessNoCase> TdMap; TdMap Map_; - + public: TSchemeInfoMap() { for (int i = TScheme::SchemeEmpty; i < TScheme::SchemeUnknown; ++i) { @@ -36,7 +36,7 @@ namespace NUri { Map_.insert(std::make_pair(info.Str, info.Kind)); } } - + TScheme::EKind Get(const TStringBuf& scheme) const { const TdMap::const_iterator it = Map_.find(scheme); return Map_.end() == it ? TScheme::SchemeUnknown : it->second; @@ -51,7 +51,7 @@ namespace NUri { const TSchemeInfo& TSchemeInfo::Get(const TStringBuf& scheme) { return Registry[TSchemeInfoMap::Instance().Get(scheme)]; - } + } const char* ParsedStateToString(const TState::EParsed& t) { switch (t) { @@ -79,7 +79,7 @@ namespace NUri { return "Parsed[Unknown]"; } } - + const char* FieldToString(const TField::EField& t) { switch (t) { case TField::FieldScheme: diff --git a/library/cpp/uri/common.h b/library/cpp/uri/common.h index 8025357763..de34fd897e 100644 --- a/library/cpp/uri/common.h +++ b/library/cpp/uri/common.h @@ -1,10 +1,10 @@ #pragma once -#include <util/stream/output.h> -#include <util/system/compat.h> +#include <util/stream/output.h> +#include <util/system/compat.h> #include <util/generic/strbuf.h> -namespace NUri { +namespace NUri { namespace NEncode { class TEncoder; class TEncodeMapperBase; @@ -14,13 +14,13 @@ namespace NUri { namespace NParse { class TRange; } - + class TParser; - + struct TField { #define FIELD_NAME(f) Field##f #define FIELD_FLAG(f) Flag##f = 1U << FIELD_NAME(f) - + enum EField { FIELD_NAME(Scheme), FIELD_NAME(User), @@ -30,24 +30,24 @@ namespace NUri { FIELD_NAME(Path), FIELD_NAME(Query), FIELD_NAME(Frag), - + // add fields above FieldUrlMAX, // reset count so actual field offsets are not interrupted FieldUrlLast = FieldUrlMAX - 1, // add extra fields below - + FIELD_NAME(HostAscii), - + // add extra fields above FieldAllMAX, // add aliases below - + FieldUsername = FieldUser, FieldPassword = FieldPass, FieldFragment = FieldFrag, }; - + enum EFlags { FIELD_FLAG(Scheme), FIELD_FLAG(User), @@ -70,11 +70,11 @@ namespace NUri { FlagAll = FlagUrlFields, // obsolete, for backwards compatibility FlagAllFields = FlagAllMAX - 1 }; - -#undef FIELD_NAME -#undef FIELD_FLAG + +#undef FIELD_NAME +#undef FIELD_FLAG }; - + struct TState { enum EParsed { ParsedOK = 0, @@ -88,7 +88,7 @@ namespace NUri { ParsedBadAuth, ParsedBadScheme, ParsedBadHost, - + // add before this line ParsedMAX }; @@ -121,17 +121,17 @@ namespace NUri { #define FEATURE_NAME(f) _BitFeature##f #define FEATURE_FLAG_NAME(f) Feature##f -#define FEATURE_FLAG(f) FEATURE_FLAG_NAME(f) = 1UL << FEATURE_NAME(f) +#define FEATURE_FLAG(f) FEATURE_FLAG_NAME(f) = 1UL << FEATURE_NAME(f) protected: enum EBit { //============================== // Cases interpreted as errors: //============================== - + // allows authorization user/password in URL FEATURE_NAME(AuthSupported), - + // allows all known schemes in URL FEATURE_NAME(SchemeKnown), @@ -140,7 +140,7 @@ namespace NUri { // allow opaque (RFC 2396) or rootless (RFC 3986) urls FEATURE_NAME(AllowRootless), - + //============================== // Cases interpreted for processing (if required): // (effects on result of Parse method) @@ -149,70 +149,70 @@ namespace NUri { // path needs normalization // (simplification of directory tree: /../, /./, etc. FEATURE_NAME(PathOperation), - + // don't force empty path to "/" FEATURE_NAME(AllowEmptyPath), - + // in scheme and host segments: // change upper case letters onto lower case ones FEATURE_NAME(ToLower), // decode unreserved symbols FEATURE_NAME(DecodeUnreserved), - + // legacy: decode standard symbols which may be safe for some fields FEATURE_NAME(DecodeStandardExtra), - + // decode symbols allowed (not necessarily safe to decode) only for a given field // (do not use directly, instead use FeatureDecodeSafe mask below) FEATURE_NAME(DecodeFieldAllowed), - + // handling of spaces FEATURE_NAME(EncodeSpace), - + // in query segment: change escaped space to '+' FEATURE_NAME(EncodeSpaceAsPlus), - + // escape all string 'markup' symbols FEATURE_NAME(EncodeForSQL), - + // encoding of extended ascii symbols (8-bit) FEATURE_NAME(EncodeExtendedASCII), - + // decoding of extended ascii symbols (8-bit) FEATURE_NAME(DecodeExtendedASCII), - + // encoding of extended delimiter set FEATURE_NAME(EncodeExtendedDelim), - + // decoding of extended delimiter set FEATURE_NAME(DecodeExtendedDelim), - + // control characters [0x00 .. 0x20) FEATURE_NAME(EncodeCntrl), - + // raw percent character FEATURE_NAME(EncodePercent), - + // hash fragments // https://developers.google.com/webmasters/ajax-crawling/docs/specification // move and encode #! fragments to the query FEATURE_NAME(HashBangToEscapedFragment), // move and decode _escaped_fragment_ to the fragment FEATURE_NAME(EscapedToHashBangFragment), - + // reject absolute paths started by "/../" FEATURE_NAME(PathDenyRootParent), - + // paths started by "/../" - ignore head FEATURE_NAME(PathStripRootParent), - + // tries to fix errors (in particular, in fragment) FEATURE_NAME(TryToFix), // check host for DNS compliance FEATURE_NAME(CheckHost), - + // allow IDN hosts // host is converted to punycode and stored in FieldHostAscii // @note host contains characters in the charset of the document @@ -221,47 +221,47 @@ namespace NUri { // percent-decoding cannot be converted from UTF-8 to UCS-4, // try to recode from the document charset (if not UTF-8) FEATURE_NAME(AllowHostIDN), - + // forces AllowHostIDN, but host is replaced with punycode // forces CheckHost since this replacement is irreversible FEATURE_NAME(ConvertHostIDN), - + // robot interpreted network paths as BadFormat urls FEATURE_NAME(DenyNetworkPath), // robot interprets URLs without a host as BadFormat FEATURE_NAME(RemoteOnly), - + /* non-RFC use case: - * 1. do not allow relative-path-only URIs when they can conflict with - * "host/path" (that is, only "./path" or "../path" are allowed); - * 2. if neither scheme nor userinfo are present but port is, it must - * be non-empty, to avoid conflict with "scheme:/..."; - * 3. if AllowRootless is not specified, rootless (or opaque) URIs are - * not recognized; - * 4. if AllowRootless is specified, disallow userinfo, preferring - * "scheme:pa@th" over "user:pass@host", and even "host:port" when - * host contains only scheme-legal characters. - */ + * 1. do not allow relative-path-only URIs when they can conflict with + * "host/path" (that is, only "./path" or "../path" are allowed); + * 2. if neither scheme nor userinfo are present but port is, it must + * be non-empty, to avoid conflict with "scheme:/..."; + * 3. if AllowRootless is not specified, rootless (or opaque) URIs are + * not recognized; + * 4. if AllowRootless is specified, disallow userinfo, preferring + * "scheme:pa@th" over "user:pass@host", and even "host:port" when + * host contains only scheme-legal characters. + */ FEATURE_NAME(NoRelPath), // standard prefers that all hex escapes were using uppercase A-F FEATURE_NAME(UpperEncoded), - + // internal usage: decode all encoded symbols FEATURE_NAME(DecodeANY), - + // add before this line _FeatureMAX }; - + protected: enum EPrivate : ui32 { FEATURE_FLAG(DecodeANY), FEATURE_FLAG(DecodeFieldAllowed), FEATURE_FLAG(DecodeStandardExtra), }; - + public: enum EPublic : ui32 { FeatureMAX = _FeatureMAX, @@ -297,27 +297,27 @@ namespace NUri { FEATURE_FLAG_NAME(HierURI) = FEATURE_FLAG_NAME(NoRelPath), FEATURE_FLAG(UpperEncoded), }; - -#undef FEATURE_NAME -#undef FEATURE_FLAG - + +#undef FEATURE_NAME +#undef FEATURE_FLAG + public: //============================== enum ESets { // these are guaranteed and will change buffer size - + FeatureDecodeStandard = 0 | FeatureDecodeUnreserved | FeatureDecodeStandardExtra, - + FeaturesDecodeExtended = 0 | FeatureDecodeExtendedASCII | FeatureDecodeExtendedDelim, - + FeaturesDecode = 0 | FeatureDecodeUnreserved | FeatureDecodeStandard | FeaturesDecodeExtended, - + FeaturesEncodeExtended = 0 | FeatureEncodeExtendedASCII | FeatureEncodeExtendedDelim, - + FeaturesEncode = 0 | FeatureEncodeForSQL | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent | FeaturesEncodeExtended, // these are not guaranteed to apply to a given field - + FeatureDecodeAllowed = 0 | FeatureDecodeUnreserved | FeatureDecodeFieldAllowed, FeaturesMaybeDecode = 0 | FeaturesDecode | FeatureDecodeAllowed, @@ -325,18 +325,18 @@ namespace NUri { FeaturesMaybeEncode = 0 | FeaturesEncode, FeaturesEncodeDecode = 0 | FeaturesMaybeEncode | FeaturesMaybeDecode, - + FeaturesAllEncoder = 0 | FeaturesEncodeDecode | FeatureDecodeANY | FeatureToLower | FeatureUpperEncoded | FeatureEncodeSpaceAsPlus, - + //============================== FeaturesNormalizeSet = 0 | FeaturePathOperation | FeatureToLower | FeatureDecodeAllowed | FeatureEncodeSpaceAsPlus | FeatureEncodeForSQL | FeaturePathStripRootParent | FeatureTryToFix | FeatureUpperEncoded, - + FeaturesDefault = 0 // it reproduces old parsedURL | FeaturePathOperation | FeaturePathDenyRootParent | FeatureCheckHost, // essentially allows all valid RFC urls and keeps them as-is FeaturesBare = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureAllowEmptyPath, - + FeaturesAll = 0 | FeatureAuthSupported | FeatureSchemeFlexible | FeatureCheckHost | FeaturesNormalizeSet, // Deprecated, use FeaturesRecommended @@ -347,14 +347,14 @@ namespace NUri { | FeatureDecodeUnreserved // 6.2.2.2 | FeaturePathOperation // 6.2.2.3 | FeaturePathDenyRootParent | FeatureSchemeKnown | FeatureConvertHostIDN | FeatureRemoteOnly | FeatureHashBangToEscapedFragment | FeatureCheckHost, - + // these are mutually exclusive FeaturesPath = 0 | FeaturePathDenyRootParent | FeaturePathStripRootParent, - + FeaturesEscapedFragment = 0 | FeatureEscapedToHashBangFragment | FeatureHashBangToEscapedFragment, - + FeaturesCheckSpecialChar = 0 | FeatureEncodeSpace | FeatureEncodeCntrl | FeatureEncodePercent, - + FeaturesEncodePChar = 0 | FeatureUpperEncoded | FeaturesEncodeDecode | FeaturesCheckSpecialChar, // http://wiki.yandex-team.ru/robot/newDesign/dups/normolization @@ -362,8 +362,8 @@ namespace NUri { FeaturesRobot = FeaturesRecommended }; - }; - + }; + static inline int strnicmp(const char* lt, const char* rt, size_t len) { return lt == rt ? 0 : ::strnicmp(lt, rt, len); } @@ -371,16 +371,16 @@ namespace NUri { static inline int CompareNoCasePrefix(const TStringBuf& lt, const TStringBuf& rt) { return strnicmp(lt.data(), rt.data(), rt.length()); } - + static inline bool EqualNoCase(const TStringBuf& lt, const TStringBuf& rt) { return lt.length() == rt.length() && 0 == CompareNoCasePrefix(lt, rt); - } - + } + static inline int CompareNoCase(const TStringBuf& lt, const TStringBuf& rt) { if (lt.length() == rt.length()) return CompareNoCasePrefix(lt, rt); return lt.length() < rt.length() ? -1 : 1; - } + } class TSchemeInfo { public: @@ -398,7 +398,7 @@ namespace NUri { bool Matches(const TStringBuf& scheme) const { return EqualNoCase(scheme, Str); } - + public: static const TSchemeInfo& Get(const TStringBuf& scheme); static const TSchemeInfo& Get(TScheme::EKind scheme) { @@ -488,24 +488,24 @@ namespace NUri { const char* ParsedStateToString(const TState::EParsed& t); const char* SchemeKindToString(const TScheme::EKind& t); -} - +} + Y_DECLARE_OUT_SPEC(inline, NUri::TField::EField, out, t) { - out << NUri::FieldToString(t); + out << NUri::FieldToString(t); } Y_DECLARE_OUT_SPEC(inline, NUri::TScheme::EKind, out, t) { - out << NUri::SchemeKindToString(t); + out << NUri::SchemeKindToString(t); } Y_DECLARE_OUT_SPEC(inline, NUri::TState::EParsed, out, t) { - out << NUri::ParsedStateToString(t); + out << NUri::ParsedStateToString(t); } static inline ui16 DefaultPort(NUri::TScheme::EKind scheme) { - return NUri::TSchemeInfo::GetDefaultPort(scheme); + return NUri::TSchemeInfo::GetDefaultPort(scheme); } static inline NUri::TScheme::EKind SchemeKind(const TStringBuf& scheme) { - return NUri::TSchemeInfo::GetKind(scheme); + return NUri::TSchemeInfo::GetKind(scheme); } diff --git a/library/cpp/uri/encode.cpp b/library/cpp/uri/encode.cpp index 584fb1bac9..2f63d8140a 100644 --- a/library/cpp/uri/encode.cpp +++ b/library/cpp/uri/encode.cpp @@ -1,33 +1,33 @@ -#include "encode.h" +#include "encode.h" #include <util/generic/singleton.h> -namespace NUri { +namespace NUri { namespace NEncode { -// http://tools.ietf.org/html/rfc3986#section-2.2 -#define GENDELIMS0 ":/?#[]@" -#define SUBDELIMS0 "!$&'()*+,;=" -// http://tools.ietf.org/html/rfc3986#section-2.3 -#define UNRESERVED "-._~" - -// now find subsets which can sometimes be decoded - -// remove '#' which can't ever be decoded -// don't mark anything allowed for pass (pass is completely encoded) -// safe in path, qry, frag -#define GENDELIMS1 ":@" -// allowed in qry, frag -#define GENDELIMS2 "/?" - -// qry-unsafe chars -#define SUBDELIMS1 "&+=;" -// rest allowed in qry, frag -#define SUBDELIMS2 "!$'()*," - +// http://tools.ietf.org/html/rfc3986#section-2.2 +#define GENDELIMS0 ":/?#[]@" +#define SUBDELIMS0 "!$&'()*+,;=" +// http://tools.ietf.org/html/rfc3986#section-2.3 +#define UNRESERVED "-._~" + +// now find subsets which can sometimes be decoded + +// remove '#' which can't ever be decoded +// don't mark anything allowed for pass (pass is completely encoded) +// safe in path, qry, frag +#define GENDELIMS1 ":@" +// allowed in qry, frag +#define GENDELIMS2 "/?" + +// qry-unsafe chars +#define SUBDELIMS1 "&+=;" +// rest allowed in qry, frag +#define SUBDELIMS2 "!$'()*," + const TEncoder::TGrammar& TEncoder::Grammar() { return *Singleton<TEncoder::TGrammar>(); } - + // initialize the grammar map TEncoder::TGrammar::TGrammar() { // first set up unreserved characters safe in any field @@ -36,37 +36,37 @@ namespace NUri { AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); AddRng('a', 'z', ECFLower, featUnres); Add(UNRESERVED, ECFUnres, featUnres); - + // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); - + // now field-specific subsets of reserved characters (gen-delims + sub-delims) const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; - + Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); - + Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); - + // control chars AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); Add(0x7f, TFeature::FeatureEncodeCntrl); - + // '%' starts a percent-encoded sequence Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); - + // extended ASCII AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); - + // extended delims Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); - + // add characters with other features Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); Add("'\"\\", TFeature::FeatureEncodeForSQL); - + GetMutable(':').EncodeFld |= TField::FlagUser; GetMutable('?').EncodeFld |= TField::FlagPath; GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; @@ -83,44 +83,44 @@ namespace NUri { return true; return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); } - + const int dD = 'a' - 'A'; - + int TEncodeMapper::EncodeSym(unsigned char& ch) const { const TCharFlags& chflags = TEncoder::GetFlags(ch); const ui64 flags = Flags & chflags.FeatFlags; - + if (flags & TFeature::FeatureToLower) ch += dD; - + if (Q_DecodeAny) return -1; - + if (flags & TFeature::FeaturesEncode) return 1; - + if (' ' == ch) { if (Q_EncodeSpcAsPlus) ch = '+'; return 0; } - + return 0; } - + int TEncodeMapper::EncodeHex(unsigned char& ch) const { const TCharFlags& chflags = TEncoder::GetFlags(ch); const ui64 flags = Flags & chflags.FeatFlags; - + if (flags & TFeature::FeatureToLower) ch += dD; - + if (Q_DecodeAny) return -1; - + if (chflags.IsDecode(FldMask, Flags)) return 0; - + if (' ' == ch) { if (!Q_EncodeSpcAsPlus) return 1; @@ -128,21 +128,21 @@ namespace NUri { return 0; } - return 1; + return 1; } - + bool TEncodeToMapper::Encode(unsigned char ch) const { if (Q_DecodeAny) return false; - + const TCharFlags& chflags = TEncoder::GetFlags(ch); if (FldMask & chflags.EncodeFld) return true; - + const ui64 flags = Flags & chflags.FeatFlags; return (flags & TFeature::FeaturesEncode); } - + TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) : Out(out) , FldSrc(fldsrc) @@ -151,12 +151,12 @@ namespace NUri { , HexValue(0) { } - + IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { static const char sHexCodes[] = "0123456789ABCDEF"; return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; } - + IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { for (size_t i = 0; i != val.length(); ++i) Encode(out, val[i]); @@ -173,7 +173,7 @@ namespace NUri { } return out; } - + IOutputStream& TEncoder::EncodeField( IOutputStream& out, const TStringBuf& val, TField::EField fld) { const ui32 fldmask = ui32(1) << fld; @@ -210,11 +210,11 @@ namespace NUri { Out << ch; return; } - + Out << '%'; if (escapepct) Out.Write("25", 2); // '%' Hex(Out, ch); } - } + } } diff --git a/library/cpp/uri/encode.h b/library/cpp/uri/encode.h index a9ece15427..6a817bf6fd 100644 --- a/library/cpp/uri/encode.h +++ b/library/cpp/uri/encode.h @@ -1,10 +1,10 @@ -#pragma once +#pragma once + +#include "common.h" + +#include <util/stream/output.h> -#include "common.h" - -#include <util/stream/output.h> - -namespace NUri { +namespace NUri { namespace NEncode { #define CHAR_TYPE_NAME(f) _ECT##f #define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f) @@ -16,7 +16,7 @@ namespace NUri { CHAR_TYPE_NAME(Unres), CHAR_TYPE_NAME(Stdrd), }; - + enum ECharFlag { CHAR_TYPE_FLAG(Digit), CHAR_TYPE_FLAG(Lower), @@ -30,8 +30,8 @@ namespace NUri { ECGStdrd = ECGUnres | ECFStdrd, }; -#undef CHAR_TYPE_NAME -#undef CHAR_TYPE_FLAG +#undef CHAR_TYPE_NAME +#undef CHAR_TYPE_FLAG struct TCharFlags { ui32 TypeFlags; @@ -65,7 +65,7 @@ namespace NUri { // should we decode an encoded character bool IsDecode(ui32 fldmask, ui64 flags) const; }; - + class TEncodeMapperBase { protected: TEncodeMapperBase() @@ -80,13 +80,13 @@ namespace NUri { , Q_DecodeAny(flags & TFeature::FeatureDecodeANY) { } - + protected: const ui64 Flags; const ui32 FldMask; const bool Q_DecodeAny; // this is a special option for username/password }; - + // maps a sym or hex character and indicates whether it has to be encoded class TEncodeMapper : public TEncodeMapperBase { @@ -99,11 +99,11 @@ namespace NUri { // negative=sym, positive=hex, zero=maybesym int EncodeSym(unsigned char&) const; int EncodeHex(unsigned char&) const; - + protected: const bool Q_EncodeSpcAsPlus; }; - + // indicates whether a character has to be encoded when copying to a field class TEncodeToMapper : public TEncodeMapperBase { @@ -121,16 +121,16 @@ namespace NUri { } bool Encode(unsigned char) const; }; - + class TEncoder { public: TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper()); - + ui64 ReEncode(const TStringBuf& url); ui64 ReEncode(const char* str, size_t len) { return ReEncode(TStringBuf(str, len)); } - + protected: static bool IsType(unsigned char c, ui64 flags) { return GetFlags(c).TypeFlags & flags; @@ -188,7 +188,7 @@ namespace NUri { static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld); static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags); - + static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) { return EncodeField(out, val, TField::FieldAllMAX); } @@ -200,20 +200,20 @@ namespace NUri { public: class TGrammar { TCharFlags Map_[256]; - + public: TGrammar(); const TCharFlags& Get(unsigned char ch) const { return Map_[ch]; } - + TCharFlags& GetMutable(unsigned char ch) { return Map_[ch]; } TCharFlags& Add(unsigned char ch, const TCharFlags& val) { return GetMutable(ch).Add(val); } - + void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) { for (unsigned i = lo; i <= hi; ++i) Add(i, val); @@ -221,7 +221,7 @@ namespace NUri { void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask)); } - + void Add(const TStringBuf& set, const TCharFlags& val) { for (size_t i = 0; i != set.length(); ++i) Add(set[i], val); @@ -230,9 +230,9 @@ namespace NUri { Add(set, TCharFlags(type, feat, decmask, encmask)); } }; - + static const TGrammar& Grammar(); - + protected: IOutputStream& Out; const TEncodeMapper FldSrc; @@ -276,7 +276,7 @@ namespace NUri { void Do(unsigned char, int); }; } - + using TEncoder = NEncode::TEncoder; } diff --git a/library/cpp/uri/encodefsm.rl6 b/library/cpp/uri/encodefsm.rl6 index 6a323aa85a..396fd40b36 100644 --- a/library/cpp/uri/encodefsm.rl6 +++ b/library/cpp/uri/encodefsm.rl6 @@ -4,21 +4,21 @@ #pragma clang diagnostic ignored "-Wunused-variable" #endif -namespace NUri { -namespace NEncode { - +namespace NUri { +namespace NEncode { + %%{ - machine TEncoder; + machine TEncoder; - hex = ( - digit >{ HexDigit(fc); } | - [A-F] >{ HexUpper(fc); } | - [a-f] >{ HexLower(fc); } - ); + hex = ( + digit >{ HexDigit(fc); } | + [A-F] >{ HexUpper(fc); } | + [a-f] >{ HexLower(fc); } + ); - escaped = ( "%" hex hex ) - > { HexReset(); } - % { DoHex(); }; + escaped = ( "%" hex hex ) + > { HexReset(); } + % { DoHex(); }; bad_escaped = ( "%" hex ) % { @@ -30,22 +30,22 @@ namespace NEncode { main := ( escaped | bad_escaped | sym )**; - write data; + write data; }%% -ui64 TEncoder::ReEncode(const TStringBuf &url) +ui64 TEncoder::ReEncode(const TStringBuf &url) { - const char *p = url.data(); - const char *pe = p + url.length(); - const char *eof = pe; + const char *p = url.data(); + const char *pe = p + url.length(); + const char *eof = pe; int cs; - OutFlags = 0; + OutFlags = 0; %% write init; %% write exec; - - return OutFlags; -} - -} + + return OutFlags; } + +} +} diff --git a/library/cpp/uri/http_url.h b/library/cpp/uri/http_url.h index 7c8e8d844d..70d53c7791 100644 --- a/library/cpp/uri/http_url.h +++ b/library/cpp/uri/http_url.h @@ -1,77 +1,77 @@ #pragma once -#include "uri.h" -#include "other.h" - -// XXX: use NUri::TUri directly; this whole file is for backwards compatibility +#include "uri.h" +#include "other.h" +// XXX: use NUri::TUri directly; this whole file is for backwards compatibility + class THttpURL : public NUri::TUri { -public: - typedef TField::EFlags TFlags; - typedef TField::EField TField; - typedef TScheme::EKind TSchemeKind; - typedef TState::EParsed TParsedState; +public: + typedef TField::EFlags TFlags; + typedef TField::EField TField; + typedef TScheme::EKind TSchemeKind; + typedef TState::EParsed TParsedState; -public: - enum { - FeatureUnescapeStandard = TFeature::FeatureDecodeStandard, +public: + enum { + FeatureUnescapeStandard = TFeature::FeatureDecodeStandard, FeatureEscSpace = TFeature::FeatureEncodeSpaceAsPlus, FeatureEscapeUnescaped = TFeature::FeatureEncodeExtendedASCII, FeatureNormalPath = TFeature::FeaturePathStripRootParent, }; -public: - THttpURL(unsigned defaultPort = 80) - : TUri(defaultPort) +public: + THttpURL(unsigned defaultPort = 80) + : TUri(defaultPort) { } - + THttpURL(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0) - : TUri(host, port, path, query, scheme, defaultPort) + : TUri(host, port, path, query, scheme, defaultPort) { } - THttpURL(const TUri& url) - : TUri(url) + THttpURL(const TUri& url) + : TUri(url) { } - -public: // XXX: don't use any of these legacy methods below -public: // use TUri::GetField() instead - /// will return null-terminated if fld is not dirty + +public: // XXX: don't use any of these legacy methods below +public: // use TUri::GetField() instead + /// will return null-terminated if fld is not dirty const char* Get(EField fld) const { - return GetField(fld).data(); - } - -public: // use TUriUpdate class so that Rewrite() is only called once + return GetField(fld).data(); + } + +public: // use TUriUpdate class so that Rewrite() is only called once void Set(EField field, const TStringBuf& value) { - if (SetInMemory(field, value)) - Rewrite(); - } - - template <size_t size> + if (SetInMemory(field, value)) + Rewrite(); + } + + template <size_t size> void Set(EField field, const char (&value)[size]) { - if (SetInMemory(field, value)) - Rewrite(); - } - -public: // use TUri::FldXXX methods for better control - // Partial quick set of the field, can be called for - // multiple fields + if (SetInMemory(field, value)) + Rewrite(); + } + +public: // use TUri::FldXXX methods for better control + // Partial quick set of the field, can be called for + // multiple fields bool SetInMemory(EField field, const TStringBuf& value) { - return FldMemSet(field, value); - } - - // clears a field + return FldMemSet(field, value); + } + + // clears a field void Reset(EField field) { - FldClr(field); - } + FldClr(field); + } }; - + static inline const char* HttpURLParsedStateToString(const NUri::TState::EParsed& t) { - return NUri::ParsedStateToString(t); -} + return NUri::ParsedStateToString(t); +} static inline const char* HttpUrlSchemeKindToString(const NUri::TScheme::EKind& t) { - return NUri::SchemeKindToString(t); -} + return NUri::SchemeKindToString(t); +} diff --git a/library/cpp/uri/other.cpp b/library/cpp/uri/other.cpp index b23a5b68a9..5ece2e6020 100644 --- a/library/cpp/uri/other.cpp +++ b/library/cpp/uri/other.cpp @@ -1,11 +1,11 @@ -#include "other.h" +#include "other.h" -#include <util/string/util.h> +#include <util/string/util.h> #include <util/system/yassert.h> /********************************************************/ /********************************************************/ - + static const Tr InvertTr(".:/?#", "\005\004\003\002\001"); static const Tr RevertTr("\005\004\003\002\001", ".:/?#"); diff --git a/library/cpp/uri/parse.cpp b/library/cpp/uri/parse.cpp index 1db4e008c4..49e087cc30 100644 --- a/library/cpp/uri/parse.cpp +++ b/library/cpp/uri/parse.cpp @@ -1,8 +1,8 @@ #include "parse.h" -#include "common.h" -#include "encode.h" +#include "common.h" +#include "encode.h" -namespace NUri { +namespace NUri { const TParseFlags TParser::FieldFlags[] = { TParseFlags(0 // FieldScheme @@ -23,27 +23,27 @@ namespace NUri { TParseFlags(0 // FieldHost | TFeature::FeatureToLower | TFeature::FeatureUpperEncoded | (TFeature::FeaturesMaybeEncode & ~TFeature::FeatureEncodeExtendedDelim), 0 | TFeature::FeaturesMaybeDecode) - + , TParseFlags(0 // FieldPort , 0) - + , TParseFlags(0 // FieldPath | TFeature::FeaturesEncodePChar | TFeature::FeaturePathOperation, 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus) - + , TParseFlags(0 // FieldQuery | TFeature::FeaturesEncodePChar | TFeature::FeatureEncodeSpaceAsPlus, 0 | TFeature::FeatureToLower) - + , TParseFlags(0 // FieldFragment | TFeature::FeaturesEncodePChar, 0 | TFeature::FeatureToLower | TFeature::FeatureEncodeSpaceAsPlus)}; - + namespace NParse { void TRange::AddRange(const TRange& range, ui64 mask) { FlagsAllPlaintext |= range.FlagsAllPlaintext; @@ -59,17 +59,17 @@ namespace NUri { } } - + void TParser::copyRequirementsImpl(const char* ptr) { Y_ASSERT(0 != CurRange.FlagsAllPlaintext); Y_UNUSED(ptr); -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(ptr, __FUNCTION__) - << " all=[" << IntToString<16>(CurRange.FlagsAllPlaintext) - << "] enc=[" << IntToString<16>(CurRange.FlagsEncodeMasked) + << " all=[" << IntToString<16>(CurRange.FlagsAllPlaintext) + << "] enc=[" << IntToString<16>(CurRange.FlagsEncodeMasked) << " & " << IntToString<16>(Flags.Allow | Flags.Extra) << "]"; PrintTail(CurRange.Beg, ptr); -#endif +#endif for (int i = 0; i < TField::FieldUrlMAX; ++i) { const TField::EField fld = TField::EField(i); TSection& section = Sections[fld]; @@ -92,22 +92,22 @@ namespace NUri { } void TParser::PctEndImpl(const char* ptr) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(PctBegin, __FUNCTION__); PrintTail(PctBegin, ptr); -#else +#else Y_UNUSED(ptr); -#endif +#endif setRequirement(PctBegin, TEncoder::GetFlags('%').FeatFlags); PctBegin = nullptr; } - + void TParser::HexSet(const char* ptr) { Y_ASSERT(nullptr != PctBegin); -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(ptr, __FUNCTION__); PrintTail(PctBegin, ptr + 1); -#endif +#endif PctBegin = nullptr; const unsigned char ch = HexValue; ui64 flags = TEncoder::GetFlags('%').FeatFlags | TEncoder::GetFlags(ch).FeatFlags; @@ -116,14 +116,14 @@ namespace NUri { } TState::EParsed TParser::ParseImpl() { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(UriStr.data(), "[Parsing]") << "URL"; PrintTail(UriStr); -#endif - +#endif + const bool ok = doParse(UriStr.data(), UriStr.length()); - -#ifdef DO_PRN + +#ifdef DO_PRN Cdbg << (ok ? "[Parsed]" : "[Failed]"); for (int idx = 0; idx < TField::FieldUrlMAX; ++idx) { const TSection& section = Sections[idx]; @@ -131,8 +131,8 @@ namespace NUri { Cdbg << ' ' << TField::EField(idx) << "=[" << section.Get() << ']'; } Cdbg << Endl; -#endif - +#endif + if (!ok) { if (!(Flags & TFeature::FeatureTryToFix) || !Sections[TField::FieldFrag].Beg) return TState::ParsedBadFormat; @@ -141,7 +141,7 @@ namespace NUri { } if ((Flags & TFeature::FeatureDenyNetworkPath) && IsNetPath()) - return TState::ParsedBadFormat; + return TState::ParsedBadFormat; const TSection& scheme = Sections[TField::FieldScheme]; Scheme = scheme.IsSet() ? TSchemeInfo::GetKind(scheme.Get()) : TScheme::SchemeEmpty; @@ -151,19 +151,19 @@ namespace NUri { // opaque case happens if (schemeInfo.FldReq & TField::FlagHost) return TState::ParsedBadFormat; - + if (TScheme::SchemeEmpty == Scheme) return TState::ParsedBadScheme; - + if (Flags & TFeature::FeatureAllowRootless) return TState::ParsedOK; if (!(Flags & TFeature::FeatureSchemeFlexible)) return TState::ParsedBadScheme; - + return TState::ParsedRootless; } - + checkSectionCollision(TField::FieldUser, TField::FieldHost); checkSectionCollision(TField::FieldPass, TField::FieldPort); @@ -199,9 +199,9 @@ namespace NUri { if ((schemeInfo.FldReq & TField::FlagHost) || (Flags & TFeature::FeatureRemoteOnly)) if (!host.IsSet() || 0 == host.Len()) return TState::ParsedBadFormat; - } - + } + return TState::ParsedOK; - } - + } + } diff --git a/library/cpp/uri/parse.h b/library/cpp/uri/parse.h index ca2358e572..0d2ce86ddf 100644 --- a/library/cpp/uri/parse.h +++ b/library/cpp/uri/parse.h @@ -1,20 +1,20 @@ -#pragma once +#pragma once -// #define DO_PRN +// #define DO_PRN -#include <cstddef> +#include <cstddef> -#include "common.h" +#include "common.h" #include <library/cpp/charset/doccodes.h> -#include <util/generic/strbuf.h> -#include <util/stream/output.h> -#include <util/string/cast.h> -#include <util/system/yassert.h> +#include <util/generic/strbuf.h> +#include <util/stream/output.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> -namespace NUri { +namespace NUri { class TParser; - + namespace NParse { class TRange { public: @@ -23,7 +23,7 @@ namespace NUri { ui64 FlagsAllPlaintext; ui32 Encode; ui32 Decode; - + public: TRange(const char* beg = nullptr) : Beg(beg) @@ -33,51 +33,51 @@ namespace NUri { , Decode(0) { } - + void Reset(const char* beg = nullptr) { *this = TRange(beg); } - + void AddRange(const TRange& range, ui64 mask); - + void AddFlag(const char* ptr, ui64 mask, ui64 flag) { if (0 != flag) AddFlagImpl(ptr, mask, flag, flag); } - + void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) { if (0 != flag) AddFlagImpl(ptr, mask, flag & ~exclflag, flag); } - + void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) { if (0 != flag) AddFlagImpl(ptr, mask, flag, flag, exclmask); } - + void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) { if (0 != flag) AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask); } - + private: void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) { AddFlagAllPlaintextImpl(ptr, plainflag); AddFlagEncodeMaskedImpl(encflag & mask); } - + void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) { AddFlagAllPlaintextImpl(ptr, plainflag); if (0 == (mask & exclmask)) AddFlagEncodeMaskedImpl(encflag & mask); } - + void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) { if (nullptr == Beg) Beg = ptr; FlagsAllPlaintext |= flag; } - + void AddFlagEncodeMaskedImpl(ui64 flag) { if (0 == flag) return; @@ -88,36 +88,36 @@ namespace NUri { ++Decode; } }; - - } - + + } + class TSection : protected NParse::TRange { private: friend class TParser; - + private: const char* End; - + TSection(const char* beg = nullptr) : NParse::TRange(beg) , End(nullptr) { } - + void Reset() { Enter(nullptr); } - + void Reset(const char* pc) { Y_ASSERT(!Beg || !pc || Beg < pc); Reset(); } - + void Enter(const char* pc) { *this = TSection(pc); } - + bool Leave(const char* pc) { Y_ASSERT(Beg); End = pc; @@ -133,40 +133,40 @@ namespace NUri { bool IsSet() const { return End; } - + TStringBuf Get() const { return TStringBuf(Beg, End); } - + size_t Len() const { return End - Beg; } - + size_t DecodedLen() const { return Len() - 2 * Decode; } - + size_t EncodedLen() const { return 2 * Encode + DecodedLen(); } - + ui32 GetEncode() const { return Encode; } - + ui32 GetDecode() const { return Decode; } - + ui64 GetFlagsEncode() const { return FlagsEncodeMasked; } - + ui64 GetFlagsAllPlaintext() const { return FlagsAllPlaintext; } }; - + class TParser { public: TSection Sections[TField::FieldUrlMAX]; @@ -175,7 +175,7 @@ namespace NUri { const TStringBuf UriStr; TState::EParsed State; ECharset Enc; - + public: TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8) : Scheme(TScheme::SchemeEmpty) @@ -218,97 +218,97 @@ namespace NUri { ui64 GetFieldFlags(TField::EField fld) const { return GetFieldFlags(fld, Flags); } - + protected: static const TParseFlags FieldFlags[TField::FieldUrlMAX]; TSection::TRange CurRange; unsigned HexValue; const char* PctBegin; - -#ifdef DO_PRN + +#ifdef DO_PRN IOutputStream& PrintAddr(const char* ptr) const { return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] "; } - + IOutputStream& PrintHead(const char* ptr, const char* func) const { return PrintAddr(ptr) << func << " "; } - + IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const { return PrintHead(ptr, func) << fld; } - + IOutputStream& PrintTail(const TStringBuf& val) const { return Cdbg << " [" << val << "]" << Endl; } IOutputStream& PrintTail(const char* beg, const char* end) const { return PrintTail(TStringBuf(beg, end)); } -#endif - +#endif + void ResetSection(TField::EField fld, const char* pc = nullptr) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); -#endif +#endif Sections[fld].Reset(pc); } void storeSection(const TStringBuf& val, TField::EField fld) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(val.data(), __FUNCTION__, fld); PrintTail(val); -#endif +#endif Sections[fld].Set(val); } - + void startSection(const char* pc, TField::EField fld) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); -#endif +#endif copyRequirements(pc); Sections[fld].Enter(pc); } void finishSection(const char* pc, TField::EField fld) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); -#endif +#endif if (Sections[fld].Leave(pc)) copyRequirements(pc); } void setRequirement(const char* ptr, ui64 flags) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); -#endif +#endif CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags); } - + void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & exclflag=" << IntToString<16>(exclflag) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); -#endif +#endif CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag); } - + void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) { -#ifdef DO_PRN +#ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & exclmask=" << IntToString<16>(exclmask) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); -#endif +#endif CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask); } - + void copyRequirementsImpl(const char* ptr); void copyRequirements(const char* ptr) { PctEnd(ptr); @@ -347,7 +347,7 @@ namespace NUri { HexReset(); PctBegin = ptr; } - + void checkSectionCollision(TField::EField fld1, TField::EField fld2) { if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) { Sections[fld1].Reset(); @@ -357,5 +357,5 @@ namespace NUri { bool doParse(const char* str_beg, size_t length); TState::EParsed ParseImpl(); }; - + } diff --git a/library/cpp/uri/parsefsm.rl6 b/library/cpp/uri/parsefsm.rl6 index 7097723650..62c05c51d0 100644 --- a/library/cpp/uri/parsefsm.rl6 +++ b/library/cpp/uri/parsefsm.rl6 @@ -5,10 +5,10 @@ #endif %%{ - machine TParser; + machine TParser; - #================================================ - # RFC 3986 http://tools.ietf.org/html/rfc3986 + #================================================ + # RFC 3986 http://tools.ietf.org/html/rfc3986 # with some modifications #================================================ # The RegEx @@ -27,475 +27,475 @@ # $8 = #Related # $9 = Related # - # So $2:scheme $4:authority $5:path $7:query $9:fragment - #================================================ - - + # So $2:scheme $4:authority $5:path $7:query $9:fragment #================================================ - # List of all ASCII characters and where they can be used - #================================================ - - # 0-31 x00-1F cntrl ext_cntrl - # 32 x20 space ext_space - # 33 x21 ! sub_delims - # 34 x22 " ext_delims - # 35 x23 # gen_delims / f=frag - # 36 x24 $ sub_delims - # 37 x25 % PCT - # 38 x26 & sub_delims - # 39 x27 ' sub_delims - # 40 x28 ( sub_delims - # 41 x29 ) sub_delims - # 42 x2A * sub_delims - # 43 x2B + sub_delims - # 44 x2C , sub_delims - # 45 x2D - unreserved - # 46 x2E . unreserved - # 47 x2F / gen_delims / f=path,qry,frag - # 48-57 x30-39 0-9 unreserved - # 58 x3A : gen_delims / f=pass,path,qry,frag - # 59 x3B ; sub_delims - # 60 x3C < ext_delims - # 61 x3D = sub_delims - # 62 x3E > ext_delims - # 63 x3F ? gen_delims / f=qry,frag - # 64 x40 @ gen_delims / f=path,qry,frag - # 65-90 x41-5A A-Z unreserved - # 91 x5B [ gen_delims / ext_delims - # 92 x5C \ ext_delims - # 93 x5D ] gen_delims / ext_delims - # 94 x5E ^ ext_delims - # 95 x5F _ unreserved - # 96 x60 ` ext_delims - # 97-122 x61-7A a-z unreserved - # 123 x7B { ext_delims - # 124 x7C | ext_delims - # 125 x7D } ext_delims - # 126 x7E ~ unreserved - # 127 x7F DEL ext_cntrl - # 128-255 x80-FF ext_ascii - - - #================================================ - # Actions used in multiple definitions - #================================================ - - action act_req_enc_sql { REQ(fpc, FeatureEncodeForSQL) } - - # REQ must apply to a char in range but not after the range has been reset - action act_req_pathop { REQ(fpc - 1, FeaturePathOperation) } - - action act_clr_scheme { CLR(fpc, Scheme) } - action act_clr_user { CLR(fpc, User) } - action act_clr_host { CLR(fpc, Host) } - action act_beg_host { BEG(fpc, Host) } - action act_end_host { END(fpc, Host) } - action act_beg_path { BEG(fpc, Path) } - action act_end_path { END(fpc, Path) } + #================================================ + # List of all ASCII characters and where they can be used + #================================================ + + # 0-31 x00-1F cntrl ext_cntrl + # 32 x20 space ext_space + # 33 x21 ! sub_delims + # 34 x22 " ext_delims + # 35 x23 # gen_delims / f=frag + # 36 x24 $ sub_delims + # 37 x25 % PCT + # 38 x26 & sub_delims + # 39 x27 ' sub_delims + # 40 x28 ( sub_delims + # 41 x29 ) sub_delims + # 42 x2A * sub_delims + # 43 x2B + sub_delims + # 44 x2C , sub_delims + # 45 x2D - unreserved + # 46 x2E . unreserved + # 47 x2F / gen_delims / f=path,qry,frag + # 48-57 x30-39 0-9 unreserved + # 58 x3A : gen_delims / f=pass,path,qry,frag + # 59 x3B ; sub_delims + # 60 x3C < ext_delims + # 61 x3D = sub_delims + # 62 x3E > ext_delims + # 63 x3F ? gen_delims / f=qry,frag + # 64 x40 @ gen_delims / f=path,qry,frag + # 65-90 x41-5A A-Z unreserved + # 91 x5B [ gen_delims / ext_delims + # 92 x5C \ ext_delims + # 93 x5D ] gen_delims / ext_delims + # 94 x5E ^ ext_delims + # 95 x5F _ unreserved + # 96 x60 ` ext_delims + # 97-122 x61-7A a-z unreserved + # 123 x7B { ext_delims + # 124 x7C | ext_delims + # 125 x7D } ext_delims + # 126 x7E ~ unreserved + # 127 x7F DEL ext_cntrl + # 128-255 x80-FF ext_ascii + + + #================================================ + # Actions used in multiple definitions + #================================================ + + action act_req_enc_sql { REQ(fpc, FeatureEncodeForSQL) } + + # REQ must apply to a char in range but not after the range has been reset + action act_req_pathop { REQ(fpc - 1, FeaturePathOperation) } + + action act_clr_scheme { CLR(fpc, Scheme) } + action act_clr_user { CLR(fpc, User) } + action act_clr_host { CLR(fpc, Host) } + action act_beg_host { BEG(fpc, Host) } + action act_end_host { END(fpc, Host) } + action act_beg_path { BEG(fpc, Path) } + action act_end_path { END(fpc, Path) } + + #================================================ - # RFC 3986 ABNFs - #================================================ - - DIGIT = digit; - - ALPHA = ( upper >{ REQ(fpc, FeatureToLower) } ) | - lower; - - ALNUM = ALPHA | DIGIT; - - PCT = "%" >{ PctBeg(fpc); } ; - - HEXDIG = ( - DIGIT >{ HexDigit(fpc, fc); } - | [A-F] >{ HexUpper(fpc, fc); } - | [a-f] >{ HexLower(fpc, fc); } - ); - - # HexSet sets REQ so must apply in range - HEXNUM = ( HEXDIG HEXDIG ) %{ HexSet(fpc - 1); }; - - pct_encoded = PCT HEXNUM; - - unreserved = ALNUM | "-" | "." | "_" | "~"; - - gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@"; - - sub_delims = "!" | "$" | "&" | "(" | ")" - | "*" | "+" | "," | ";" | "=" - | ( ['] >act_req_enc_sql ); - - - #================================================ - # Local ABNFs - #================================================ - - VALID = ^(cntrl | space) | " "; - - # safe character sequences - safe = unreserved | pct_encoded | sub_delims; - - # MOD: Yandex extensions - - ext_ascii = (VALID - ascii) >{ REQ(fpc, FeatureEncodeExtendedASCII) }; - ext_delims = ( "[" | "]" | "|" | "{" | "}" | "`" | "^" | "<" | ">" - | ( ["\\] >act_req_enc_sql ) + # RFC 3986 ABNFs + #================================================ + + DIGIT = digit; + + ALPHA = ( upper >{ REQ(fpc, FeatureToLower) } ) | + lower; + + ALNUM = ALPHA | DIGIT; + + PCT = "%" >{ PctBeg(fpc); } ; + + HEXDIG = ( + DIGIT >{ HexDigit(fpc, fc); } + | [A-F] >{ HexUpper(fpc, fc); } + | [a-f] >{ HexLower(fpc, fc); } + ); + + # HexSet sets REQ so must apply in range + HEXNUM = ( HEXDIG HEXDIG ) %{ HexSet(fpc - 1); }; + + pct_encoded = PCT HEXNUM; + + unreserved = ALNUM | "-" | "." | "_" | "~"; + + gen_delims = ":" | "/" | "?" | "#" | "[" | "]" | "@"; + + sub_delims = "!" | "$" | "&" | "(" | ")" + | "*" | "+" | "," | ";" | "=" + | ( ['] >act_req_enc_sql ); + + + #================================================ + # Local ABNFs + #================================================ + + VALID = ^(cntrl | space) | " "; + + # safe character sequences + safe = unreserved | pct_encoded | sub_delims; + + # MOD: Yandex extensions + + ext_ascii = (VALID - ascii) >{ REQ(fpc, FeatureEncodeExtendedASCII) }; + ext_delims = ( "[" | "]" | "|" | "{" | "}" | "`" | "^" | "<" | ">" + | ( ["\\] >act_req_enc_sql ) ) >{ REQ(fpc, FeatureEncodeExtendedDelim) }; # " fix hilite - ext_space = " " >{ REQ(fpc, FeatureEncodeSpace) }; - ext_cntrl = cntrl >{ REQ(fpc, FeatureEncodeCntrl) }; + ext_space = " " >{ REQ(fpc, FeatureEncodeSpace) }; + ext_cntrl = cntrl >{ REQ(fpc, FeatureEncodeCntrl) }; pct_maybe_encoded = PCT (HEXDIG | HEXNUM)? ; - ext_safe = unreserved - | pct_maybe_encoded - | sub_delims - | ext_delims - | ext_space - | ext_cntrl - | ext_ascii; - - # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - # uric (RFC 2396) - # MOD: extension to format, add extended delimiters and 8-bit ascii - - pchar_nc = ext_safe | "@"; - pchar = pchar_nc | ":"; - path_sep = "/"; - uric = pchar | path_sep | "?"; - - - #================================================ - # Fields - #================================================ - # Single fields use fXXX as machine definitions - - - #================================================ - # Scheme - # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - #================================================ - - scheme = ( ALPHA ( ALPHA | DIGIT | "+" | "-" | "." )** ); - fscheme = scheme >{ BEG(fpc, Scheme) } %{ END(fpc, Scheme) }; - - - #================================================ - # UserInfo - # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) - #================================================ - - # MOD: split into a pair of sections: username and password - - fuser = ( ext_safe )** >{ BEG(fpc, User) } %{ END(fpc, User) }; - fpass = ( ext_safe | ":" )** >{ BEG(fpc, Pass) } %{ END(fpc, Pass) }; - userinfo = ( fuser ( ":" fpass )? ) ( "@" %act_clr_host @^act_clr_user ); - - - #================================================ - # Hostname - # host = IP-literal / IPv4address / reg-name - #================================================ - - # MOD: simplify IP-literal for now - IPv6address = (HEXDIG | ":" | ".")+; - IP_literal = "[" IPv6address "]"; - - # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet - # MOD: simplify dec-octet which originally matches only 0-255 - - dec_octet = DIGIT+; - IPv4address = dec_octet "." dec_octet "." dec_octet "." dec_octet; - + ext_safe = unreserved + | pct_maybe_encoded + | sub_delims + | ext_delims + | ext_space + | ext_cntrl + | ext_ascii; + + # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + # uric (RFC 2396) + # MOD: extension to format, add extended delimiters and 8-bit ascii + + pchar_nc = ext_safe | "@"; + pchar = pchar_nc | ":"; + path_sep = "/"; + uric = pchar | path_sep | "?"; + + + #================================================ + # Fields + #================================================ + # Single fields use fXXX as machine definitions + + + #================================================ + # Scheme + # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + #================================================ + + scheme = ( ALPHA ( ALPHA | DIGIT | "+" | "-" | "." )** ); + fscheme = scheme >{ BEG(fpc, Scheme) } %{ END(fpc, Scheme) }; + + + #================================================ + # UserInfo + # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + #================================================ + + # MOD: split into a pair of sections: username and password + + fuser = ( ext_safe )** >{ BEG(fpc, User) } %{ END(fpc, User) }; + fpass = ( ext_safe | ":" )** >{ BEG(fpc, Pass) } %{ END(fpc, Pass) }; + userinfo = ( fuser ( ":" fpass )? ) ( "@" %act_clr_host @^act_clr_user ); + + + #================================================ + # Hostname + # host = IP-literal / IPv4address / reg-name + #================================================ + + # MOD: simplify IP-literal for now + IPv6address = (HEXDIG | ":" | ".")+; + IP_literal = "[" IPv6address "]"; + + # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + # MOD: simplify dec-octet which originally matches only 0-255 + + dec_octet = DIGIT+; + IPv4address = dec_octet "." dec_octet "." dec_octet "." dec_octet; + # MOD: non-empty; will use host? - # reg-name = *( unreserved / pct-encoded / sub-delims ) + # reg-name = *( unreserved / pct-encoded / sub-delims ) ### todo: allow ':' (need to fix grammar to disambiguate port) achar = any - (0x00 .. 0x20) - '/' - '#' - '?' - ':' - '%'; upperhalf = any - (0x00 .. 0x7F); hostname = (((achar | pct_encoded)+) & (any* (alnum | upperhalf) any*)); reg_name = hostname - IPv4address - IP_literal; - - # uses first-match-wins approach - host = IP_literal | IPv4address | (reg_name - IPv4address); - fhost = host? >act_beg_host %act_end_host; - fhost_nempty = host >act_beg_host %act_end_host; - - - #================================================ - # Port - # port = *DIGIT - #================================================ - - # MOD: use fport? for empty - fport = DIGIT+ >{ BEG(fpc, Port) } %{ END(fpc, Port) }; + + # uses first-match-wins approach + host = IP_literal | IPv4address | (reg_name - IPv4address); + fhost = host? >act_beg_host %act_end_host; + fhost_nempty = host >act_beg_host %act_end_host; - #================================================ - # Authority - # authority = [ userinfo "@" ] host [ ":" port ] - #================================================ + #================================================ + # Port + # port = *DIGIT + #================================================ - authority = userinfo? fhost ( ":" fport? )? ; + # MOD: use fport? for empty + fport = DIGIT+ >{ BEG(fpc, Port) } %{ END(fpc, Port) }; #================================================ - # Path - #================================================ - # path = path-abempty ; begins with "/" or is empty - # / path-absolute ; begins with "/" but not "//" - # / path-noscheme ; begins with a non-colon segment - # / path-rootless ; begins with a segment - # / path-empty ; zero characters - #================================================ - + # Authority + # authority = [ userinfo "@" ] host [ ":" port ] + #================================================ + + authority = userinfo? fhost ( ":" fport? )? ; + + + #================================================ + # Path + #================================================ + # path = path-abempty ; begins with "/" or is empty + # / path-absolute ; begins with "/" but not "//" + # / path-noscheme ; begins with a non-colon segment + # / path-rootless ; begins with a segment + # / path-empty ; zero characters + #================================================ + # checkPath rules - checkPathHead = - "." ( "."? path_sep VALID* )? %act_req_pathop ; - - checkPathTail = - VALID* - ( path_sep "."{1,2} ) %act_req_pathop ; - - checkPathMid = VALID* - ( path_sep "."{,2} path_sep ) %act_req_pathop - VALID*; - - checkAbsPath = checkPathMid | checkPathTail | VALID*; - checkRelPath = checkPathHead | checkAbsPath; - - # segment = *pchar - segment = pchar**; - - # segment-nz = 1*pchar - segment_nz = pchar+; - - # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) - segment_nz_nc = pchar_nc+; - - sep_segment = path_sep segment; - - # non-standard definitions - - fpath_abnempty = - ( - ( sep_segment+ ) - & checkAbsPath - ) - >act_beg_path %act_end_path - ; - - fpath_relative = - ( - "." - ( "."? sep_segment+ )? - ) - >act_beg_path %act_req_pathop %act_end_path - ; - - # standard definitions - - # do not save empty paths, they behave differently in relative resolutions - fpath_empty = zlen; - - fpath_abempty = fpath_abnempty?; - - fpath_absolute = - ( - ( path_sep ( segment_nz sep_segment* )? ) - & checkAbsPath - ) - >act_beg_path %act_end_path - ; - - fpath_noscheme = - ( - ( segment_nz_nc sep_segment* ) - & checkRelPath - ) - >act_beg_path %act_end_path - ; - - fpath_rootless = - ( - ( segment_nz sep_segment* ) - ) - >act_beg_path %act_end_path - ; - - #================================================ - # Query and fragment - # query = *( pchar / "/" / "?" ) - # fragment = *( pchar / "/" / "?" ) - #================================================ - - # MOD: fragment allows '#' characters - - fquery = (uric )** >{ BEG(fpc, Query) } %{ END(fpc, Query) }; - ffrag = (uric | "#")** >{ BEG(fpc, Frag) } %{ END(fpc, Frag) }; - query_frag = ("?" fquery)? ("#" ffrag)? ; + checkPathHead = + "." ( "."? path_sep VALID* )? %act_req_pathop ; + + checkPathTail = + VALID* + ( path_sep "."{1,2} ) %act_req_pathop ; + + checkPathMid = VALID* + ( path_sep "."{,2} path_sep ) %act_req_pathop + VALID*; + + checkAbsPath = checkPathMid | checkPathTail | VALID*; + checkRelPath = checkPathHead | checkAbsPath; + + # segment = *pchar + segment = pchar**; + + # segment-nz = 1*pchar + segment_nz = pchar+; + + # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + segment_nz_nc = pchar_nc+; + + sep_segment = path_sep segment; + + # non-standard definitions + + fpath_abnempty = + ( + ( sep_segment+ ) + & checkAbsPath + ) + >act_beg_path %act_end_path + ; + + fpath_relative = + ( + "." + ( "."? sep_segment+ )? + ) + >act_beg_path %act_req_pathop %act_end_path + ; + + # standard definitions + + # do not save empty paths, they behave differently in relative resolutions + fpath_empty = zlen; + + fpath_abempty = fpath_abnempty?; + + fpath_absolute = + ( + ( path_sep ( segment_nz sep_segment* )? ) + & checkAbsPath + ) + >act_beg_path %act_end_path + ; + + fpath_noscheme = + ( + ( segment_nz_nc sep_segment* ) + & checkRelPath + ) + >act_beg_path %act_end_path + ; + + fpath_rootless = + ( + ( segment_nz sep_segment* ) + ) + >act_beg_path %act_end_path + ; + + #================================================ + # Query and fragment + # query = *( pchar / "/" / "?" ) + # fragment = *( pchar / "/" / "?" ) + #================================================ + + # MOD: fragment allows '#' characters + + fquery = (uric )** >{ BEG(fpc, Query) } %{ END(fpc, Query) }; + ffrag = (uric | "#")** >{ BEG(fpc, Frag) } %{ END(fpc, Frag) }; + query_frag = ("?" fquery)? ("#" ffrag)? ; #================================================ - # final ABNFs - # URI-reference = URI / relative-ref + # final ABNFs + # URI-reference = URI / relative-ref #================================================ - # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - # hier-part = "//" authority path-abempty - # / path-absolute - # / path-rootless - # / path-empty - # relative-ref = relative-part [ "?" query ] [ "#" fragment ] - # relative-part = "//" authority path-abempty - # / path-absolute - # / path-noscheme - # / path-empty - - net_path = "//" authority fpath_abempty; - - URI = - fscheme ":" - ( - net_path - | fpath_absolute - | fpath_rootless - | fpath_empty - ) - $^act_clr_scheme - query_frag - ; - - relative_ref = - ( - net_path - | fpath_absolute - | fpath_noscheme - | fpath_empty - ) - %act_clr_scheme - query_frag - ; - - # non-standard definitions - - URI_no_rootless = - fscheme ":" - ( - net_path - | fpath_absolute - | fpath_empty - ) - $^act_clr_scheme - query_frag - ; - - host_path = - ( - fhost_nempty fpath_abempty - | (fhost_nempty - scheme) ":" fport fpath_abempty - ) - @^act_clr_host - ; - - # no userinfo, path absolute, empty or clearly relative, starting with "./" | "../" - relative_ref_host_pabem = - ( - net_path - | host_path - | fpath_absolute - | fpath_relative - | fpath_empty - ) - %act_clr_scheme - query_frag - ; - - # port must be non-empty, to avoid clash with "scheme:/..." - auth_path = - ( - fhost_nempty ( ":" fport )? fpath_abempty - | userinfo fhost ( ":" fport? )? fpath_abempty - ) - @^act_clr_host - @^act_clr_user - ; - - # userinfo, path absolute, empty or clearly relative, starting with "./" | "../" - relative_ref_auth_pabem = - ( - net_path - | auth_path - | fpath_absolute - | fpath_relative - | fpath_empty - ) - %act_clr_scheme - query_frag - ; - - - # machine instantiations - - URI_ref_no_rootless := - ( - URI_no_rootless - # scheme://user@host preferred over user://pass@host/path - | relative_ref_auth_pabem - ) - ; - - URI_ref_no_relpath := - ( - relative_ref_host_pabem - # host:port/path preferred over scheme:path/rootless - | (URI - relative_ref_host_pabem) - ) - ; - - URI_ref := - ( - relative_ref - | URI - ) - ; - + # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + # hier-part = "//" authority path-abempty + # / path-absolute + # / path-rootless + # / path-empty + # relative-ref = relative-part [ "?" query ] [ "#" fragment ] + # relative-part = "//" authority path-abempty + # / path-absolute + # / path-noscheme + # / path-empty + + net_path = "//" authority fpath_abempty; + + URI = + fscheme ":" + ( + net_path + | fpath_absolute + | fpath_rootless + | fpath_empty + ) + $^act_clr_scheme + query_frag + ; + + relative_ref = + ( + net_path + | fpath_absolute + | fpath_noscheme + | fpath_empty + ) + %act_clr_scheme + query_frag + ; + + # non-standard definitions + + URI_no_rootless = + fscheme ":" + ( + net_path + | fpath_absolute + | fpath_empty + ) + $^act_clr_scheme + query_frag + ; + + host_path = + ( + fhost_nempty fpath_abempty + | (fhost_nempty - scheme) ":" fport fpath_abempty + ) + @^act_clr_host + ; + + # no userinfo, path absolute, empty or clearly relative, starting with "./" | "../" + relative_ref_host_pabem = + ( + net_path + | host_path + | fpath_absolute + | fpath_relative + | fpath_empty + ) + %act_clr_scheme + query_frag + ; + + # port must be non-empty, to avoid clash with "scheme:/..." + auth_path = + ( + fhost_nempty ( ":" fport )? fpath_abempty + | userinfo fhost ( ":" fport? )? fpath_abempty + ) + @^act_clr_host + @^act_clr_user + ; + + # userinfo, path absolute, empty or clearly relative, starting with "./" | "../" + relative_ref_auth_pabem = + ( + net_path + | auth_path + | fpath_absolute + | fpath_relative + | fpath_empty + ) + %act_clr_scheme + query_frag + ; + + + # machine instantiations + + URI_ref_no_rootless := + ( + URI_no_rootless + # scheme://user@host preferred over user://pass@host/path + | relative_ref_auth_pabem + ) + ; + + URI_ref_no_relpath := + ( + relative_ref_host_pabem + # host:port/path preferred over scheme:path/rootless + | (URI - relative_ref_host_pabem) + ) + ; + + URI_ref := + ( + relative_ref + | URI + ) + ; + write data; }%% -namespace NUri { - -bool TParser::doParse(const char* str_beg, size_t length) +namespace NUri { + +bool TParser::doParse(const char* str_beg, size_t length) { const char* p = str_beg; - const char* pe = str_beg + length; - const char* eof = pe; + const char* pe = str_beg + length; + const char* eof = pe; int cs; -#define BEG(ptr, fld) startSection (ptr, TField::Field ## fld); -#define END(ptr, fld) finishSection(ptr, TField::Field ## fld); -#define SET(val, fld) storeSection(val, TField::Field ## fld); -#define CLR(ptr, fld) ResetSection (TField::Field ## fld, ptr); -#define REQ(ptr, req) setRequirement(ptr, TFeature :: req); - - %% write init nocs; - +#define BEG(ptr, fld) startSection (ptr, TField::Field ## fld); +#define END(ptr, fld) finishSection(ptr, TField::Field ## fld); +#define SET(val, fld) storeSection(val, TField::Field ## fld); +#define CLR(ptr, fld) ResetSection (TField::Field ## fld, ptr); +#define REQ(ptr, req) setRequirement(ptr, TFeature :: req); + + %% write init nocs; + if (0 == (Flags & TFeature::FeatureNoRelPath)) { - cs = TParser_en_URI_ref; + cs = TParser_en_URI_ref; } else if (0 == (Flags & TFeature::FeatureAllowRootless)) { - cs = TParser_en_URI_ref_no_rootless; + cs = TParser_en_URI_ref_no_rootless; } else { - cs = TParser_en_URI_ref_no_relpath; + cs = TParser_en_URI_ref_no_relpath; } - + %% write exec; -#undef BEG -#undef END -#undef SET -#undef CLR -#undef REQ - - return cs >= TParser_first_final; -} - +#undef BEG +#undef END +#undef SET +#undef CLR +#undef REQ + + return cs >= TParser_first_final; } + +} diff --git a/library/cpp/uri/uri-ru_ut.cpp b/library/cpp/uri/uri-ru_ut.cpp index ec35a164d2..3ce98df5f4 100644 --- a/library/cpp/uri/uri-ru_ut.cpp +++ b/library/cpp/uri/uri-ru_ut.cpp @@ -1,9 +1,9 @@ -#include "uri_ut.h" +#include "uri_ut.h" #include <library/cpp/charset/recyr.hh> #include <library/cpp/html/entity/htmlentity.h> #include <util/system/maxlen.h> -namespace NUri { +namespace NUri { namespace { TString AsWin1251(const TString& s) { return Recode(CODES_UTF8, CODES_WIN, s); @@ -36,10 +36,10 @@ namespace NUri { UNIT_ASSERT(url.IsNull(TField::FlagHost)); UNIT_ASSERT(!url.IsNull(TField::FlagPath)); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagPath), "www.ya.ru/index.html"); - + UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10")), TState::ParsedOK); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10")); - + UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"), TFeature::FeaturesDefault | TFeature::FeatureEncodeExtendedASCII), TState::ParsedOK); @@ -131,9 +131,9 @@ namespace NUri { Y_UNIT_TEST(testRuIDNA) { { #define DEC "\xD7\xE5\xF0\xE5\xEf\xEE\xE2\xE5\xF6.\xF0\xF4" /* "Череповец.рф" in Windows-1251 */ -#define ENC "%D7%E5%F0%E5%EF%EE%E2%E5%F6.%F0%F4" -// punycode corresponds to lowercase -#define PNC "xn--b1afab7bff7cb.xn--p1ai" +#define ENC "%D7%E5%F0%E5%EF%EE%E2%E5%F6.%F0%F4" +// punycode corresponds to lowercase +#define PNC "xn--b1afab7bff7cb.xn--p1ai" TTest test = { "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; TUri url; @@ -141,9 +141,9 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); -#undef PNC -#undef DEC -#undef ENC +#undef PNC +#undef DEC +#undef ENC } } @@ -158,6 +158,6 @@ namespace NUri { // Should be properly null-terminated UNIT_ASSERT_VALUES_EQUAL(host.size(), strlen(host.data())); } - } - + } + } diff --git a/library/cpp/uri/uri.cpp b/library/cpp/uri/uri.cpp index 56a9a4e5ef..d2df1ccea8 100644 --- a/library/cpp/uri/uri.cpp +++ b/library/cpp/uri/uri.cpp @@ -1,32 +1,32 @@ -#include "uri.h" -#include "parse.h" +#include "uri.h" +#include "parse.h" -#include <util/string/cast.h> -#include <util/string/util.h> +#include <util/string/cast.h> +#include <util/string/util.h> #include <util/system/yassert.h> -namespace NUri { +namespace NUri { TState::EParsed TUri::CheckHost(const TStringBuf& host) { if (host.empty()) return ParsedOK; - + unsigned domainLevel = 0; unsigned domainLevelOfUnderscore = 0; - + bool isAlnum = false; bool startLabel = true; for (size_t i = 0; i != host.length(); ++i) { const char ch = host[i]; - + if ('.' == ch) { // label separator if (!isAlnum || startLabel) // previous label must end in alnum return ParsedBadHost; startLabel = true; continue; } - + isAlnum = isalnum((const unsigned char)ch); - + if (startLabel) { // label is starting if (!isAlnum && '_' != ch) // new label must start with alnum or '_' return ParsedBadHost; @@ -36,7 +36,7 @@ namespace NUri { domainLevelOfUnderscore = domainLevel; continue; } - + if (isAlnum || '-' == ch) continue; @@ -46,11 +46,11 @@ namespace NUri { } return ParsedBadHost; - } - + } + if (0 < domainLevelOfUnderscore && domainLevel < 2 + domainLevelOfUnderscore) return ParsedBadHost; - + return ParsedOK; } @@ -65,40 +65,40 @@ namespace NUri { if (!scheme.empty()) { if (SetSchemeImpl(TSchemeInfo::Get(scheme)).Str.empty()) FldSet(FieldScheme, scheme); - } - + } + if (0 < defaultPort) // override the scheme's default port DefaultPort = static_cast<ui16>(defaultPort); - + char sport[6]; // enough for ui16 if (0 != port) { const size_t len = ToString(port, sport, sizeof(sport)); FldSet(FieldPort, TStringBuf(sport, len)); } - + FldTrySet(FieldHost, host); FldTrySet(FieldPath, path); FldTrySet(FieldQuery, query); Rewrite(); - } - + } + /********************************************************/ bool TUri::FldSetImpl( EField field, TStringBuf value, bool strconst, bool nocopy) { if (!FldIsValid(field)) return false; - + switch (field) { case FieldScheme: if (!SetScheme(TSchemeInfo::Get(value)).Str.empty()) return false; break; - + case FieldPort: Port = value.empty() ? 0 : FromString<ui16>(value); break; - + default: break; } @@ -107,18 +107,18 @@ namespace NUri { FldClr(field); return false; } - + if (strconst) { // string constants don't need to be saved in the buffer FldMarkClean(field); FldSetNoDirty(field, value); - return false; + return false; } - + if (nocopy) { FldSet(field, value); return true; } - + return FldTryCpy(field, value); } @@ -128,7 +128,7 @@ namespace NUri { do { if (!FldIsSet(field)) break; - + TStringBuf& fld = Fields[field]; if (fld.length() < value.length()) break; @@ -146,10 +146,10 @@ namespace NUri { FldMarkDirty(field); } - FldSetNoDirty(field, value); - return true; - } - + FldSetNoDirty(field, value); + return true; + } + /********************************************************/ void TUri::RewriteImpl() { size_t len = 0; @@ -169,7 +169,7 @@ namespace NUri { const EField fld = EField(i); if (!FldIsSet(fld)) continue; - + const char* beg = out.Buf(); const TStringBuf& val = Fields[fld]; out << val; @@ -178,12 +178,12 @@ namespace NUri { } Buffer = std::move(newbuf); } - + CheckMissingFields(); FieldsDirty = 0; - } - + } + void TUri::CheckMissingFields() { // if host is set but path is not... if (FldSetCmp(FlagPath | FlagHost, FlagHost)) @@ -197,7 +197,7 @@ namespace NUri { void TUri::Merge(const TUri& base, int correctAbs) { if (base.Scheme == SchemeUnknown) return; - + if (!base.IsValidGlobal()) return; @@ -207,11 +207,11 @@ namespace NUri { const bool noscheme = !selfscheme.IsInited(); if (!noscheme && !EqualNoCase(selfscheme, basescheme)) return; - + const ui32 cleanFields = ~FieldsDirty; do { static constexpr TStringBuf rootPath = "/"; - + if (noscheme) { if (!basescheme.empty()) { FldSetNoDirty(FieldScheme, basescheme); @@ -222,7 +222,7 @@ namespace NUri { Scheme = base.Scheme; DefaultPort = base.DefaultPort; } - + if (!IsNull(FlagHost)) break; // no merge @@ -232,15 +232,15 @@ namespace NUri { if (noscheme && IsNull(FlagQuery) && IsNull(FlagPath)) FldTrySet(FieldQuery, base); - + if (IsNull(FlagAuth) && !base.IsNull(FlagAuth)) { FldChkSet(FieldUser, base); FldChkSet(FieldPass, base); - } - + } + if (IsValidAbs()) break; - + TStringBuf p0 = base.GetField(FieldPath); if (!p0.IsInited()) p0 = rootPath; @@ -265,33 +265,33 @@ namespace NUri { else if (p1.empty() || '.' != p1[0]) pathop = false; out << p1; - + char* beg = out.Data(); char* end = beg + out.Filled(); if (pathop && !PathOperation(beg, end, correctAbs)) { Clear(); break; } - + // Needs immediate forced rewrite because of TTempBuf FldSetNoDirty(FieldPath, TStringBuf(beg, end)); RewriteImpl(); } while (false); - + CheckMissingFields(); - + // rewrite only if borrowed fields from base if (cleanFields & FieldsDirty) RewriteImpl(); } - + /********************************************************/ TUri::TLinkType TUri::Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase, long careFlags, ECharset enc) { // parse URL if (ParsedOK != ParseImpl(link, careFlags, 0, SchemeEmpty, enc)) return LinkIsBad; - + const TStringBuf& host = GetHost(); // merge with base URL @@ -343,7 +343,7 @@ namespace NUri { else len += v.length() + 1; } - } + } } return len; @@ -362,7 +362,7 @@ namespace NUri { if (!v.empty()) out << v << ':'; } - + TStringBuf host; if (flags & FlagHost) { const EField fldhost = @@ -373,11 +373,11 @@ namespace NUri { TStringBuf port; if ((flags & FlagPort) && 0 != Port && Port != DefaultPort) port = Fields[FieldPort]; - + if (host) { if (wantFlags & FlagScheme) out << "//"; - + if (flags & FlagAuth) { if (flags & FlagUser) { v = Fields[FieldUser]; @@ -392,7 +392,7 @@ namespace NUri { TEncoder::EncodeAll(out, v); } } - + out << '@'; } @@ -476,11 +476,11 @@ namespace NUri { return false; if (pathPtr == pathEnd) return true; - + if ((pathEnd - pathPtr) >= 2 && *(pathEnd - 2) == '/' && *(pathEnd - 1) == '.') { --pathEnd; } - + char* p_wr = pathEnd; int upCount = 0; diff --git a/library/cpp/uri/uri.h b/library/cpp/uri/uri.h index 3b6c19fe4a..c686e0c7ea 100644 --- a/library/cpp/uri/uri.h +++ b/library/cpp/uri/uri.h @@ -1,22 +1,22 @@ #pragma once -#include "common.h" -#include "encode.h" +#include "common.h" +#include "encode.h" #include <library/cpp/charset/doccodes.h> #include <util/generic/buffer.h> -#include <util/generic/ptr.h> -#include <util/generic/singleton.h> +#include <util/generic/ptr.h> +#include <util/generic/singleton.h> #include <util/generic/string.h> -#include <util/memory/alloc.h> +#include <util/memory/alloc.h> #include <util/stream/mem.h> -#include <util/stream/output.h> +#include <util/stream/output.h> #include <util/stream/str.h> #include <util/system/yassert.h> #include <cstdlib> -namespace NUri { +namespace NUri { /********************************************************/ class TUri : public TFeature, @@ -31,7 +31,7 @@ namespace NUri { LinkIsLocal, LinkIsGlobal }; - + private: TBuffer Buffer; TStringBuf Fields[FieldAllMAX]; @@ -57,7 +57,7 @@ namespace NUri { Scheme = SchemeEmpty; FieldsDirty = 0; } - + void CopyData(const TUri& url) { FieldsSet = url.FieldsSet; Port = url.Port; @@ -65,51 +65,51 @@ namespace NUri { Scheme = url.Scheme; FieldsDirty = url.FieldsDirty; } - + void CopyImpl(const TUri& url) { for (int i = 0; i < FieldAllMAX; ++i) Fields[i] = url.Fields[i]; - + RewriteImpl(); } - + private: static ui32 FldFlag(EField fld) { return 1 << fld; } - + public: static bool FldIsValid(EField fld) { return 0 <= fld && FieldAllMAX > fld; } - + bool FldSetCmp(ui32 chk, ui32 exp) const { return (FieldsSet & chk) == exp; } - + bool FldSetCmp(ui32 chk) const { return FldSetCmp(chk, chk); } - + bool FldIsSet(EField fld) const { return !FldSetCmp(FldFlag(fld), 0); } - + private: void FldMarkSet(EField fld) { FieldsSet |= FldFlag(fld); } - + void FldMarkUnset(EField fld) { FieldsSet &= ~FldFlag(fld); } - + // use when we know the field is dirty or RewriteImpl will be called void FldSetNoDirty(EField fld, const TStringBuf& value) { Fields[fld] = value; FldMarkSet(fld); } - + void FldSet(EField fld, const TStringBuf& value) { FldSetNoDirty(fld, value); FldMarkDirty(fld); @@ -118,7 +118,7 @@ namespace NUri { const TStringBuf& FldGet(EField fld) const { return Fields[fld]; } - + private: /// depending on value, clears or sets it void FldChkSet(EField fld, const TStringBuf& value) { @@ -130,7 +130,7 @@ namespace NUri { void FldChkSet(EField fld, const TUri& other) { FldChkSet(fld, other.GetField(fld)); } - + /// set only if initialized bool FldTrySet(EField fld, const TStringBuf& value) { const bool ok = value.IsInited(); @@ -145,48 +145,48 @@ namespace NUri { private: /// copies the value if it fits bool FldTryCpy(EField fld, const TStringBuf& value); - + // main method: sets the field value, possibly copies, etc. bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false); - + public: // clear a field void FldClr(EField fld) { Fields[fld].Clear(); FldMarkUnset(fld); FldMarkClean(fld); } - + bool FldTryClr(EField field) { const bool ok = FldIsSet(field); if (ok) FldClr(field); return ok; } - + public: // set a field value: might leave state dirty and require a Rewrite() // copies if fits and not dirty, sets and marks dirty otherwise bool FldMemCpy(EField field, const TStringBuf& value) { return FldSetImpl(field, value, false); } - + // uses directly, marks dirty /// @note client MUST guarantee value will be alive until Rewrite is called bool FldMemSet(EField field, const TStringBuf& value) { return FldSetImpl(field, value, false, true); } - + // uses directly, doesn't mark dirty (value scope exceeds "this") bool FldMemUse(EField field, const TStringBuf& value) { return FldSetImpl(field, value, true); } - + // uses directly, doesn't mark dirty template <size_t size> bool FldMemSet(EField field, const char (&value)[size]) { static_assert(size > 0); return FldSetImpl(field, TStringBuf(value, size - 1), true); } - + // duplicate one field to another bool FldDup(EField src, EField dst) { if (!FldIsSet(src) || !FldIsValid(dst)) @@ -198,7 +198,7 @@ namespace NUri { FldMarkClean(dst); return true; } - + // move one field to another bool FldMov(EField src, EField dst) { if (!FldDup(src, dst)) @@ -206,47 +206,47 @@ namespace NUri { FldClr(src); return true; } - + private: bool IsInBuffer(const char* buf) const { return buf >= Buffer.data() && buf < Buffer.data() + Buffer.size(); } - + public: bool FldIsDirty() const { return 0 != FieldsDirty; } - + bool FldIsDirty(EField fld) const { return 0 != (FieldsDirty & FldFlag(fld)); } - + private: void FldMarkDirty(EField fld) { FieldsDirty |= FldFlag(fld); } - + void FldMarkClean(EField fld) { FieldsDirty &= ~FldFlag(fld); } - + void RewriteImpl(); - + public: static TState::EParsed CheckHost(const TStringBuf& host); - + // convert a [potential] IDN to ascii static TMallocPtr<char> IDNToAscii(const wchar32* idna); static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8); - + // convert hosts with percent-encoded or extended chars - + // returns non-empty string if host can be converted to ASCII with given parameters static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8); - + // returns host if already ascii, or non-empty if it can be converted static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8); - + public: explicit TUri(unsigned defaultPort = 0) : FieldsSet(0) @@ -256,9 +256,9 @@ namespace NUri { , FieldsDirty(0) { } - + TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0); - + TUri(const TUri& url) : FieldsSet(url.FieldsSet) , Port(url.Port) @@ -283,8 +283,8 @@ namespace NUri { void Clear() { Dealloc(); ClearImpl(); - } - + } + ui32 GetFieldMask() const { return FieldsSet; } @@ -292,15 +292,15 @@ namespace NUri { ui32 GetUrlFieldMask() const { return GetFieldMask() & FlagUrlFields; } - + ui32 GetDirtyMask() const { return FieldsDirty; } - + void CheckMissingFields(); - + // Process methods - + void Rewrite() { if (FldIsDirty()) RewriteImpl(); @@ -310,7 +310,7 @@ namespace NUri { TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty); TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8); - + public: TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) { const TState::EParsed ret = AssignImpl(parser, defscheme); @@ -318,40 +318,40 @@ namespace NUri { Rewrite(); return ret; } - + TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); if (ParsedOK == ret) Rewrite(); return ret; } - + // parses absolute URIs // prepends default scheme (unless unknown) if URI has none TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8); - + TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) { return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc); } - + TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8); - + TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) { return ParseUri(url, flags); } - + TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8); - + TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) { const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc); return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat; } - + // correctAbs works with head "/.." portions: // 1 - reject URL // 0 - keep portions // -1 - ignore portions - + void Merge(const TUri& base, int correctAbs = -1); TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), long careFlags = FeaturesDefault, ECharset enc = CODES_UTF8); @@ -365,36 +365,36 @@ namespace NUri { protected: size_t PrintSize(ui32 flags) const; - + // Output method, prints to stream IOutputStream& PrintImpl(IOutputStream& out, int flags) const; - + char* PrintImpl(char* str, size_t size, int flags) const { TMemoryOutput out(str, size); PrintImpl(out, flags) << '\0'; return str; } - + static bool IsAbsPath(const TStringBuf& path) { return 1 <= path.length() && path[0] == '/'; } - + bool IsAbsPathImpl() const { return IsAbsPath(GetField(FieldPath)); } - + public: // Output method, prints to stream IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const { return PrintImpl(out, PrintFlags(flags)); } - + // Output method, print to str, allocate memory if str is NULL // Should be deprecated char* Print(char* str, size_t size, int flags = FlagUrlFields) const { return nullptr == str ? Serialize(flags) : Serialize(str, size, flags); } - + char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const { Y_ASSERT(str); flags = PrintFlags(flags); @@ -407,7 +407,7 @@ namespace NUri { const size_t size = PrintSize(flags) + 1; return PrintImpl(static_cast<char*>(malloc(size)), size, flags); } - + // Output method to str void Print(TString& str, int flags = FlagUrlFields) const { flags = PrintFlags(flags); @@ -415,7 +415,7 @@ namespace NUri { TStringOutput out(str); PrintImpl(out, flags); } - + TString PrintS(int flags = FlagUrlFields) const { TString str; Print(str, flags); @@ -429,7 +429,7 @@ namespace NUri { TString PrintHostS() const { return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort); } - + // Info methods int Compare(const TUri& A, int flags = FlagUrlFields) const; @@ -438,7 +438,7 @@ namespace NUri { const TStringBuf& GetField(EField fld) const { return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>(); } - + ui16 GetPort() const { return 0 == Port ? DefaultPort : Port; } @@ -454,14 +454,14 @@ namespace NUri { bool UseHostAscii() { return FldMov(FieldHostAscii, FieldHost); } - + TScheme::EKind GetScheme() const { return Scheme; } const TSchemeInfo& GetSchemeInfo() const { return TSchemeInfo::Get(Scheme); } - + bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const { return !FldSetCmp(flags); } @@ -475,7 +475,7 @@ namespace NUri { return false; return IsAbsPathImpl(); } - + bool IsValidGlobal() const { if (IsNull(FlagScheme | FlagHost)) return false; @@ -492,13 +492,13 @@ namespace NUri { bool IsOpaque() const { return IsRootless(); } - + // Inline helpers TUri& operator=(const TUri& u) { Copy(u); return *this; } - + bool operator!() const { return IsNull(); } @@ -554,14 +554,14 @@ namespace NUri { static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, long flags = FeaturesEncodeDecode) { return ReEncodeField(out, val, FieldAllMAX, flags); } - + static int PathOperationFlag(const TParseFlags& flags) { return flags & FeaturePathDenyRootParent ? 1 : flags & FeaturePathStripRootParent ? -1 : 0; } static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs); - + private: const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) { Scheme = info.Kind; @@ -573,7 +573,7 @@ namespace NUri { const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) { return SetSchemeImpl(TSchemeInfo::Get(scheme)); } - + public: const TSchemeInfo& SetScheme(const TSchemeInfo& info) { SetSchemeImpl(info); @@ -585,7 +585,7 @@ namespace NUri { return SetScheme(TSchemeInfo::Get(scheme)); } }; - + class TUriUpdate { TUri& Uri_; @@ -597,30 +597,30 @@ namespace NUri { ~TUriUpdate() { Uri_.Rewrite(); } - + public: bool Set(TField::EField field, const TStringBuf& value) { return Uri_.FldMemSet(field, value); } - + template <size_t size> bool Set(TField::EField field, const char (&value)[size]) { return Uri_.FldMemSet(field, value); } - + void Clr(TField::EField field) { Uri_.FldClr(field); } }; - + const char* LinkTypeToString(const TUri::TLinkType& t); - -} + +} Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) { - url.Print(out); -} - + url.Print(out); +} + Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) { - out << NUri::LinkTypeToString(t); + out << NUri::LinkTypeToString(t); } diff --git a/library/cpp/uri/uri_ut.cpp b/library/cpp/uri/uri_ut.cpp index 2ebd83fc93..2a1f689e46 100644 --- a/library/cpp/uri/uri_ut.cpp +++ b/library/cpp/uri/uri_ut.cpp @@ -1,11 +1,11 @@ -#include "uri_ut.h" -#include "other.h" +#include "uri_ut.h" +#include "other.h" #include "qargs.h" #include <library/cpp/html/entity/htmlentity.h> -#include <util/system/maxlen.h> +#include <util/system/maxlen.h> -namespace NUri { +namespace NUri { Y_UNIT_TEST_SUITE(URLTest) { static const char* urls[] = { "http://a/b/c/d;p?q#r", @@ -59,7 +59,7 @@ namespace NUri { "%20y", "http://a/b/c/%20y", // "%2zy", "http://a/b/c/%2zy", nullptr}; - + Y_UNIT_TEST(test_httpURL) { TUri rel, base, abs; TState::EParsed er = base.Parse(urls[0]); @@ -90,7 +90,7 @@ namespace NUri { UNIT_ASSERT_EQUAL_C(rel, abs, errbuf); } } - + Y_UNIT_TEST(test_Schemes) { TUri url; UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK); @@ -113,7 +113,7 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.Parse("httpsssss://www.ya.ru", TFeature::FeaturesDefault | TFeature::FeatureSchemeFlexible), TState::ParsedOK); UNIT_ASSERT_EQUAL(url.GetScheme(), TScheme::SchemeUnknown); } - + struct Link4Norm { const char* const base; const char* const link; @@ -296,7 +296,7 @@ namespace NUri { URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "host:8080"); } - + Y_UNIT_TEST(test06) { TTest test = { "http://user:pass@host?q", TFeature::FeaturesAll, TState::ParsedOK, "http", "user", "pass", "host", 80, "/", "q", ""}; @@ -306,7 +306,7 @@ namespace NUri { UNIT_ASSERT(!url.FldIsDirty()); UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldScheme), "https"); UNIT_ASSERT_VALUES_EQUAL(url.GetPort(), 443); - + // test copying TUri url2(url); // make sure strings are equal... @@ -319,13 +319,13 @@ namespace NUri { url2.GetField(TField::FieldUser)); // and urls compare the same URL_EQ(url, url2); - + // cause a dirty field url.FldMemSet(TField::FieldUser, "use"); // it is now shorter UNIT_ASSERT(!url.FldIsDirty()); url.FldMemSet(TField::FieldUser, TStringBuf("user")); UNIT_ASSERT(url.FldIsDirty()); - + // copy again url2 = url; UNIT_ASSERT(url.FldIsDirty()); @@ -340,7 +340,7 @@ namespace NUri { url.GetField(TField::FieldUser).data(), url2.GetField(TField::FieldUser).data()); URL_EQ(url, url2); - + // make query empty url.FldMemSet(TField::FieldQuery, ""); url2 = url; @@ -358,7 +358,7 @@ namespace NUri { url2.FldMemSet(TField::FieldPort, "443"); URL_EQ(url, url2); } - + Y_UNIT_TEST(test07) { { TTest test = { @@ -372,7 +372,7 @@ namespace NUri { UNIT_ASSERT_EQUAL(TScheme::SchemeHTTP, url.GetScheme()); UNIT_ASSERT_EQUAL("http", url.GetField(TField::FieldScheme)); } - + { const TString scheme = "http"; const TString host = "host.com"; @@ -384,7 +384,7 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), urlstr + "/"); } } - + Y_UNIT_TEST(test08) { { TTest test = { @@ -471,7 +471,7 @@ namespace NUri { URL_TEST(url, test); } } - + Y_UNIT_TEST(test09) { { TTest test = { @@ -492,7 +492,7 @@ namespace NUri { URL_TEST(url, test); } } - + Y_UNIT_TEST(test10) { // test some escaping madness, note the ehost vs host { @@ -504,7 +504,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TString host = "%D0%BF%D1%80%D0%B5%D0%B7%D0%B8%D0%B4%D0%B5%D0%BD%D1%82.%D1%80%D1%84"; const TString urlstr = TString::Join("http://", host, "/"); @@ -513,7 +513,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TString host = "Фilip.ru"; TString ehost = "%D0%A4ilip.ru"; @@ -523,7 +523,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TString host = "%D0%A4ilip.ru"; const TString urlstr = TString::Join("http://", host); @@ -532,7 +532,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TString host = "Filip%90.rЯ"; TString ehost = "Filip%90.r%D0%AF"; @@ -542,7 +542,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TString host = "Filip%90.r%D0%AF"; const TString urlstr = TString::Join(host, ":8080"); @@ -560,7 +560,7 @@ namespace NUri { TUri url; URL_TEST(url, test); } - + { TTest test = { "HtTp://HoSt/%50a%54h/?Query#Frag", TParseFlags(TFeature::FeaturesAll | TFeature::FeatureNoRelPath, TFeature::FeatureToLower), TState::ParsedOK, "http", "", "", "host", 80, "/path/", "query", "frag"}; @@ -572,62 +572,62 @@ namespace NUri { Y_UNIT_TEST(test12) { // test characters which are not always safe { -#define RAW "/:" -#define DEC "%2F:" -#define ENC "%2F%3A" +#define RAW "/:" +#define DEC "%2F:" +#define ENC "%2F%3A" TTest test = { "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); -#undef RAW -#undef DEC -#undef ENC +#undef RAW +#undef DEC +#undef ENC } { -#define RAW "?@" -#define DEC "%3F@" -#define ENC "%3F%40" +#define RAW "?@" +#define DEC "%3F@" +#define ENC "%3F%40" TTest test = { "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" DEC, RAW, RAW}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" DEC "?" RAW "#" RAW); -#undef RAW -#undef DEC -#undef ENC +#undef RAW +#undef DEC +#undef ENC } { -#define RAW "%&;=" -#define DEC "%25&;=" -#define ENC "%25%26%3B%3D" +#define RAW "%&;=" +#define DEC "%25&;=" +#define ENC "%25%26%3B%3D" TTest test = { "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, ENC, ENC}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC); -#undef RAW -#undef DEC -#undef ENC +#undef RAW +#undef DEC +#undef ENC } { -#define RAW "!$'()*," -#define DEC "!$%27()*," -#define ENC "%21%24%27%28%29%2A%2C" +#define RAW "!$'()*," +#define DEC "!$%27()*," +#define ENC "%21%24%27%28%29%2A%2C" TTest test = { "http://" ENC ":" ENC "@host/" ENC "?" ENC "#" ENC, TFeature::FeaturesAll, TState::ParsedOK, "http", RAW, RAW, "host", 80, "/" ENC, DEC, DEC}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" ENC ":" ENC "@host/" ENC "?" DEC "#" DEC); -#undef RAW -#undef DEC -#undef ENC +#undef RAW +#undef DEC +#undef ENC } { -#define DEC "Череповец。рф" -#define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" -// punycode corresponds to lowercase -#define PNC "xn--b1afab7bff7cb.xn--p1ai" +#define DEC "Череповец。рф" +#define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" +// punycode corresponds to lowercase +#define PNC "xn--b1afab7bff7cb.xn--p1ai" TTest test = { "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC}; TUri url; @@ -635,23 +635,23 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC); -#undef PNC -#undef DEC -#undef ENC +#undef PNC +#undef DEC +#undef ENC } { -#define DEC "Череповец。рф" -#define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" -// punycode corresponds to lowercase -#define PNC "xn--b1afab7bff7cb.xn--p1ai" +#define DEC "Череповец。рф" +#define ENC "%D0%A7%D0%B5%D1%80%D0%B5%D0%BF%D0%BE%D0%B2%D0%B5%D1%86%E3%80%82%D1%80%D1%84" +// punycode corresponds to lowercase +#define PNC "xn--b1afab7bff7cb.xn--p1ai" TTest test = { "http://" DEC "/" DEC "?" DEC "#" DEC, TParseFlags(TFeature::FeaturesRobot | TFeature::FeatureEncodeExtendedASCII), TState::ParsedOK, "http", "", "", PNC, 80, "/" ENC, ENC, ENC}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" PNC "/" ENC "?" ENC "#" ENC); -#undef PNC -#undef DEC -#undef ENC +#undef PNC +#undef DEC +#undef ENC } { #define DEC "независимая-экспертиза-оценка-ущерба-авто-дтп.рф" @@ -665,7 +665,7 @@ namespace NUri { #undef DEC } } - + Y_UNIT_TEST(testFlexibleAuthority) { TUri uri; UNIT_ASSERT_EQUAL(uri.Parse("http://hello_world", TFeature::FeatureCheckHost), TState::ParsedBadHost); @@ -728,21 +728,21 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host/pa%thA"); } } - + Y_UNIT_TEST(testIPv6) { { -#define RAW "[1080:0:0:0:8:800:200C:417A]" -#define DEC "[1080:0:0:0:8:800:200c:417a]" +#define RAW "[1080:0:0:0:8:800:200C:417A]" +#define DEC "[1080:0:0:0:8:800:200c:417a]" TTest test = { "http://" RAW "/" RAW "?" RAW "#" RAW, TParseFlags(TFeature::FeaturesAll), TState::ParsedOK, "http", "", "", DEC, 80, "/" RAW, RAW, RAW}; TUri url; URL_TEST(url, test); UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" RAW "?" RAW "#" RAW); -#undef DEC -#undef RAW +#undef DEC +#undef RAW } } - + Y_UNIT_TEST(testEscapedFragment) { { TTest test = { @@ -759,7 +759,7 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://host.com/#!a=b&c=d#e+g%25"); } } - + Y_UNIT_TEST(testReEncode) { { TStringStream out; @@ -767,7 +767,7 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(out.Str(), "foo%20bar"); } } - + static const TStringBuf NonRfcUrls[] = { "http://deshevle.ru/price/price=&SrchTp=1&clID=24&BL=SrchTp=0|clID=24&frmID=75&SortBy=P&PreSort=&NmDir=0&VndDir=0&PrDir=0&SPP=44", "http://secure.rollerwarehouse.com/skates/aggressive/skates/c/11[03]/tx/$$$+11[03][a-z]", @@ -781,7 +781,7 @@ namespace NUri { "http://www.trinity.by/?section_id=46,47,48&cat=1&filters[]=2^_^Sony", "http://translate.yandex.net/api/v1/tr.json/translate?lang=en-ru&text=>", nullptr}; - + Y_UNIT_TEST(test_NonRfcUrls) { TUri url; const long flags = TFeature::FeaturesRobot; @@ -792,11 +792,11 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(TState::ParsedOK, url.Parse(buf, flags)); } } - + static const TStringBuf CheckParseException[] = { "http://www.'>'.com/?.net/", nullptr}; - + Y_UNIT_TEST(test_CheckParseException) { TUri url; const long flags = TFeature::FeaturesRobot | TFeature::FeaturesEncode; @@ -816,8 +816,8 @@ namespace NUri { } ythrow yexception() << "failed to parse URL [" << buf << "]: " << what; } - } - + } + Y_UNIT_TEST(test_PrintPort) { TUri uri; { @@ -900,7 +900,7 @@ namespace NUri { UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldPath), "/path"); } } - + Y_UNIT_TEST_SUITE(TInvertDomainTest) { Y_UNIT_TEST(TestInvert) { TString a; @@ -925,24 +925,24 @@ namespace NUri { TString h("www.yandex.ru:8080/redir.pl?url=https://google.com/"); UNIT_ASSERT_EQUAL(InvertDomain(h), "ru.yandex.www:8080/redir.pl?url=https://google.com/"); } - } - + } + TQueryArg::EProcessed ProcessQargs(TString url, TString& processed, TQueryArgFilter filter = 0, void* filterData = 0) { TUri uri; uri.Parse(url, NUri::TFeature::FeaturesRecommended); - + TQueryArgProcessing processing(TQueryArg::FeatureSortByName | (filter ? TQueryArg::FeatureFilter : 0) | TQueryArg::FeatureRewriteDirty, filter, filterData); auto result = processing.Process(uri); processed = uri.PrintS(); return result; } - + TString SortQargs(TString url) { TString r; ProcessQargs(url, r); return r; } - + bool QueryArgsFilter(const TQueryArg& arg, void* filterData) { const char* skipName = static_cast<const char*>(filterData); return arg.Name != skipName; diff --git a/library/cpp/uri/uri_ut.h b/library/cpp/uri/uri_ut.h index f8ac6e4092..4df9af6283 100644 --- a/library/cpp/uri/uri_ut.h +++ b/library/cpp/uri/uri_ut.h @@ -1,10 +1,10 @@ -#pragma once - -#include "uri.h" - +#pragma once + +#include "uri.h" + #include <library/cpp/testing/unittest/registar.h> - -namespace NUri { + +namespace NUri { struct TTest { TStringBuf Val; TParseFlags Flags; @@ -18,19 +18,19 @@ namespace NUri { TStringBuf Query; TStringBuf Frag; }; - -} - -#define URL_MSG(url1, url2, cmp) \ + +} + +#define URL_MSG(url1, url2, cmp) \ (TString("[") + url1.PrintS() + ("] " cmp " [") + url2.PrintS() + "]") -#define URL_EQ(url1, url2) \ - UNIT_ASSERT_EQUAL_C(url, url2, URL_MSG(url1, url2, "!=")) -#define URL_NEQ(url1, url2) \ - UNIT_ASSERT_UNEQUAL_C(url, url2, URL_MSG(url1, url2, "==")) - -#define CMP_FLD(url, test, fld) \ +#define URL_EQ(url1, url2) \ + UNIT_ASSERT_EQUAL_C(url, url2, URL_MSG(url1, url2, "!=")) +#define URL_NEQ(url1, url2) \ + UNIT_ASSERT_UNEQUAL_C(url, url2, URL_MSG(url1, url2, "==")) + +#define CMP_FLD(url, test, fld) \ UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::Field##fld), test.fld) - + #define CMP_URL(url, test) \ do { \ CMP_FLD(url, test, Scheme); \ @@ -42,7 +42,7 @@ namespace NUri { CMP_FLD(url, test, Query); \ CMP_FLD(url, test, Frag); \ } while (false) - + #define URL_TEST_ENC(url, test, enc) \ do { \ TState::EParsed st = url.ParseUri(test.Val, test.Flags, 0, enc); \ @@ -76,6 +76,6 @@ namespace NUri { CMP_URL(_url, test2); \ UNIT_ASSERT_VALUES_EQUAL(url.GetUrlFieldMask(), _url.GetUrlFieldMask()); \ } while (false) - -#define URL_TEST(url, test) \ - URL_TEST_ENC(url, test, CODES_UTF8) + +#define URL_TEST(url, test) \ + URL_TEST_ENC(url, test, CODES_UTF8) diff --git a/library/cpp/uri/ya.make b/library/cpp/uri/ya.make index 8fc808a6af..03f3aa6a0f 100644 --- a/library/cpp/uri/ya.make +++ b/library/cpp/uri/ya.make @@ -1,30 +1,30 @@ LIBRARY() - + OWNER( mvel g:base ) -SRCS( - assign.cpp - common.cpp - encode.cpp +SRCS( + assign.cpp + common.cpp + encode.cpp http_url.h location.cpp - other.cpp - parse.cpp + other.cpp + parse.cpp qargs.cpp - uri.cpp + uri.cpp encodefsm.rl6 parsefsm.rl6 -) - -PEERDIR( - contrib/libs/libidn +) + +PEERDIR( + contrib/libs/libidn library/cpp/charset -) - -END() +) + +END() RECURSE( benchmark |