diff options
author | socialgraph <socialgraph@yandex-team.com> | 2022-08-03 13:01:16 +0300 |
---|---|---|
committer | socialgraph <socialgraph@yandex-team.com> | 2022-08-03 13:01:16 +0300 |
commit | 15c8b667f31654db48aa974062cc70aade3e25e3 (patch) | |
tree | 53a3ac13e2e684fc5d5eb8fbffcd4586613cc496 /library/cpp/uri | |
parent | cb684f6475d0ce37cbb8ba4123d191fd5e877161 (diff) | |
download | ydb-15c8b667f31654db48aa974062cc70aade3e25e3.tar.gz |
refactor assign
Чтобы было проще работать с кодом для задачипровел рефакторинг кода.
- избавился от while(false)
- избавился от излишних сокращений в названии локальных переменных
- добавил фигурные скобки
Diffstat (limited to 'library/cpp/uri')
-rw-r--r-- | library/cpp/uri/assign.cpp | 427 |
1 files changed, 220 insertions, 207 deletions
diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp index 224b0ff6ef..97122c445a 100644 --- a/library/cpp/uri/assign.cpp +++ b/library/cpp/uri/assign.cpp @@ -16,8 +16,9 @@ namespace NUri { // proper stringprep and splitting on dot-equivalent characters char* buf; static_assert(sizeof(*idna) == sizeof(ui32), "fixme"); - if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*)idna, &buf, 0)) + if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*) idna, &buf, 0)) { buf = nullptr; + } return buf; } @@ -32,13 +33,13 @@ namespace NUri { } TStringBuf TUri::HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc) { - TStringBuf outhost; // store the result here before returning it, to get RVO + TStringBuf outHost; // store the result here before returning it, to get RVO size_t buflen = 0; - if (hasExtended && !allowIDN) - return outhost; // definitely can't convert - + if (hasExtended && !allowIDN) { + return outHost; // definitely can't convert + } // charset-recode: RFC 3986, 3.2.2, requires percent-encoded non-ASCII // chars in reg-name to be UTF-8 so convert to UTF-8 prior to decoding const bool recoding = CODES_UTF8 != enc && hasExtended; @@ -46,8 +47,9 @@ namespace NUri { size_t nrd, nwr; buflen = host.length() * 4; buf.Reset(static_cast<char*>(y_allocate(buflen))); - if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr)) - return outhost; + if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr)) { + return outHost; + } host = TStringBuf(buf.Get(), nwr); } @@ -63,15 +65,16 @@ namespace NUri { hasExtended = 0 != (outFlags & FeatureEncodeExtendedASCII); // check again - if (hasExtended && !allowIDN) - return outhost; + if (hasExtended && !allowIDN) { + return outHost; + } host = out.Str(); // convert to punycode if needed if (!hasExtended) { - outhost = host; - return outhost; + outHost = host; + return outHost; } TMallocPtr<char> puny; @@ -82,342 +85,352 @@ namespace NUri { if (!puny) { // XXX: try user charset unless UTF8 or converted to it - if (CODES_UTF8 == enc || recoding) - return outhost; + if (CODES_UTF8 == enc || recoding) { + return outHost; + } try { puny = IDNToAscii(host, enc); } catch (const yexception& /* exc */) { - return outhost; + return outHost; + } + if (!puny) { + return outHost; } - if (!puny) - return outhost; } buf = puny; - outhost = buf.Get(); + outHost = buf.Get(); - return outhost; + return outHost; } TStringBuf TUri::HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc) { // find what we have long haveFlags = 0; - for (size_t i = 0; i != host.length(); ++i) + for (size_t i = 0; i != host.length(); ++i) { haveFlags |= TEncoder::GetFlags(host[i]).FeatFlags; - + } // interested in encoded characters or (if IDN is allowed) extended ascii - TStringBuf outhost; + TStringBuf outHost; const bool haveExtended = haveFlags & FeatureEncodeExtendedASCII; if (!haveExtended || allowIDN) { - if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) - outhost = host; - else - outhost = HostToAscii(host, buf, haveExtended, allowIDN, enc); + if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) { + outHost = host; + } else { + outHost = HostToAscii(host, buf, haveExtended, allowIDN, enc); + } } - - return outhost; + return outHost; } - static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField fld, const TStringBuf& val, long flags) { - if (val.empty()) + static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField field, const TStringBuf& value, long flags) { + if (value.empty()) { return false; - if (flags & TFeature::FeaturesAllEncoder) - TUri::ReEncodeField(out, val, fld, flags); - else - out << val; + } + if (flags & TFeature::FeaturesAllEncoder) { + TUri::ReEncodeField(out, value, field, flags); + } else { + out << value; + } return true; } - TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defscheme) { + TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defaultScheme) { Clear(); - TState::EParsed ret = parser.State; - if (ParsedBadFormat <= ret) - return ret; - + TState::EParsed status = parser.State; + if (ParsedBadFormat <= status) { + return status; + } const TSection& scheme = parser.Get(FieldScheme); const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme); // set the scheme always if available - if (schemeInfo.Str.empty() && scheme.IsSet()) + if (schemeInfo.Str.empty() && scheme.IsSet()) { FldSet(FieldScheme, scheme.Get()); - - if (ParsedOK != ret) - return ret; - + } + if (ParsedOK != status) { + return status; + } size_t buflen = 0; // special processing for fields const bool convertIDN = parser.Flags & FeatureConvertHostIDN; long flags = parser.Flags.Allow; - if (convertIDN) + if (convertIDN) { flags |= FeatureAllowHostIDN | FeatureCheckHost; + } // process non-ASCII host for punycode - TMallocPtr<char> hostptr; - TStringBuf hostascii; // empty: use host field; non-empty: ascii - bool hostConverted = false; // hostascii is empty or the original + TMallocPtr<char> hostPtr; + TStringBuf hostAsciiBuf; + bool inHostNonAsciiChars = false; + const TSection& host = parser.Get(FieldHost); if (host.IsSet() && !FldIsSet(FieldHost)) { const bool allowIDN = (flags & FeatureAllowHostIDN); - const TStringBuf hostbuf = host.Get(); + const TStringBuf hostBuf = host.Get(); // if we know we have and allow extended-ASCII chars, no need to check further - if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) - hostascii = HostToAscii(hostbuf, hostptr, true, true, parser.Enc); - else - hostascii = HostToAscii(hostbuf, hostptr, allowIDN, parser.Enc); - - if (hostascii.empty()) - ret = ParsedBadHost; // exists but cannot be converted - else if (hostbuf.data() != hostascii.data()) { - hostConverted = true; - buflen += 1 + hostascii.length(); - if (convertIDN) + if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) { + hostAsciiBuf = HostToAscii(hostBuf, hostPtr, true, true, parser.Enc); + } else { + hostAsciiBuf = HostToAscii(hostBuf, hostPtr, allowIDN, parser.Enc); + } + + if (hostAsciiBuf.empty()) { + status = ParsedBadHost; // exists but cannot be converted + + } else if (hostBuf.data() != hostAsciiBuf.data()) { + inHostNonAsciiChars = true; + + buflen += 1 + hostAsciiBuf.length(); + if (convertIDN) { FldMarkSet(FieldHost); // so that we don't process host below + } } } // add unprocessed fields - for (int idx = 0; idx < FieldUrlMAX; ++idx) { - const EField fld = EField(idx); - const TSection& section = parser.Get(fld); - if (section.IsSet() && !FldIsSet(fld)) + for (int i = 0; i < FieldUrlMAX; ++i) { + const EField field = EField(i); + const TSection& section = parser.Get(field); + + if (section.IsSet() && !FldIsSet(field)) { buflen += 1 + section.EncodedLen(); // includes null + } + } + if (0 == buflen) { // no more sections set? + return status; } - if (0 == buflen) // no more sections set? - return ret; // process #! fragments // https://developers.google.com/webmasters/ajax-crawling/docs/specification - static const TStringBuf escFragPrefix(TStringBuf("_escaped_fragment_=")); + static const TStringBuf escapedFragment(TStringBuf("_escaped_fragment_=")); - bool encHashBangFrag = false; - TStringBuf qryBeforeEscapedFragment; - TStringBuf qryEscapedFragment; - do { - if (FldIsSet(FieldFrag) || FldIsSet(FieldQuery)) - break; + bool encodeHashBang = false; + TStringBuf queryBeforeEscapedFragment; + TStringBuf queryEscapedFragment; + if (!FldIsSet(FieldFrag) && !FldIsSet(FieldQuery)) { const TSection& frag = parser.Get(FieldFrag); + if (frag.IsSet()) { - if (0 == (parser.Flags & FeatureHashBangToEscapedFragment)) - break; - const TStringBuf fragbuf = frag.Get(); - if (fragbuf.empty() || '!' != fragbuf[0]) - break; - encHashBangFrag = true; - // '!' will make space for '&' or '\0' if needed - buflen += escFragPrefix.length(); - buflen += 2 * fragbuf.length(); // we don't know how many will be encoded + if (0 != (parser.Flags & FeatureHashBangToEscapedFragment)) { + const TStringBuf fragBuf = frag.Get(); + if (!fragBuf.empty() && '!' == fragBuf[0]) { + encodeHashBang = true; + // '!' will make space for '&' or '\0' if needed + buflen += escapedFragment.length(); + buflen += 2 * fragBuf.length(); // we don't know how many will be encoded + } + } } else { - const TSection& qry = parser.Get(FieldQuery); - if (!qry.IsSet()) - break; - // FeatureHashBangToEscapedFragment has preference - if (FeatureEscapedToHashBangFragment != (parser.Flags & FeaturesEscapedFragment)) - break; - qry.Get().RSplit('&', qryBeforeEscapedFragment, qryEscapedFragment); - if (!qryEscapedFragment.StartsWith(escFragPrefix)) { - qryEscapedFragment.Clear(); - break; + const TSection& query = parser.Get(FieldQuery); + if (query.IsSet()) { + // FeatureHashBangToEscapedFragment has preference + if (FeatureEscapedToHashBangFragment == (parser.Flags & FeaturesEscapedFragment)) { + const TStringBuf queryBuf = query.Get(); + + queryBuf.RSplit('&', queryBeforeEscapedFragment, queryEscapedFragment); + if (queryEscapedFragment.StartsWith(escapedFragment)) { + queryEscapedFragment.Skip(escapedFragment.length()); + buflen += 2; // for '!' and '\0' in fragment + buflen -= escapedFragment.length(); + } else { + queryEscapedFragment.Clear(); + } + } } - qryEscapedFragment.Skip(escFragPrefix.length()); - buflen += 2; // for '!' and '\0' in fragment - buflen -= escFragPrefix.length(); } - } while (false); + } // now set all fields prior to validating Alloc(buflen); TMemoryWriteBuffer out(Buffer.data(), Buffer.size()); - for (int idx = 0; idx < FieldUrlMAX; ++idx) { - const EField fld = EField(idx); + for (int i = 0; i < FieldUrlMAX; ++i) { + const EField field = EField(i); - const TSection& section = parser.Get(fld); - if (!section.IsSet() || FldIsSet(fld)) + const TSection& section = parser.Get(field); + if (!section.IsSet() || FldIsSet(field)) { continue; - - if (FieldQuery == fld && encHashBangFrag) + } + if (FieldQuery == field && encodeHashBang) { continue; - - if (FieldFrag == fld && qryEscapedFragment.IsInited()) + } + if (FieldFrag == field && queryEscapedFragment.IsInited()) { continue; + } char* beg = out.Buf(); - TStringBuf val = section.Get(); + TStringBuf value = section.Get(); long careFlags = section.GetFlagsEncode(); - switch (fld) { - default: - break; - - case FieldQuery: - if (qryEscapedFragment.IsInited()) { - const EField dstfld = FieldFrag; // that's where we will store - out << '!'; - if (!qryEscapedFragment.empty()) - ReEncodeToField(out, qryEscapedFragment, fld, FeatureDecodeANY | careFlags, dstfld, FeatureDecodeANY | parser.GetFieldFlags(dstfld)); - FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); - if (qryBeforeEscapedFragment.empty()) - continue; - out << '\0'; - beg = out.Buf(); - val = qryBeforeEscapedFragment; + if (field == FieldQuery) { + if (queryEscapedFragment.IsInited()) { + out << '!'; + if (!queryEscapedFragment.empty()) { + ReEncodeToField( + out, queryEscapedFragment, + FieldQuery, FeatureDecodeANY | careFlags, + FieldFrag, FeatureDecodeANY | parser.GetFieldFlags(FieldFrag) + ); } - break; - - case FieldFrag: - if (encHashBangFrag) { - const EField dstfld = FieldQuery; // that's where we will store - const TSection& qry = parser.Get(dstfld); - if (qry.IsSet()) - if (AppendField(out, dstfld, qry.Get(), qry.GetFlagsEncode())) - out << '&'; - out << escFragPrefix; - val.Skip(1); // skip '!' - ReEncodeToField(out, val, fld, careFlags, dstfld, parser.GetFieldFlags(dstfld)); - FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf())); + FldSetNoDirty(FieldFrag, TStringBuf(beg, out.Buf())); + if (queryBeforeEscapedFragment.empty()) { continue; } - break; + out << '\0'; + beg = out.Buf(); + value = queryBeforeEscapedFragment; + } + } else if (field == FieldFrag) { + if (encodeHashBang) { + const TSection& query = parser.Get(FieldQuery); + if (query.IsSet() && AppendField(out, FieldQuery, query.Get(), query.GetFlagsEncode())) { + out << '&'; + } + out << escapedFragment; + value.Skip(1); // skip '!' + ReEncodeToField( + out, value, + FieldFrag, careFlags, + FieldQuery, parser.GetFieldFlags(FieldQuery) + ); + FldSetNoDirty(FieldQuery, TStringBuf(beg, out.Buf())); + continue; + } } - AppendField(out, fld, val, careFlags); + AppendField(out, field, value, careFlags); char* end = out.Buf(); if (careFlags & FeaturePathOperation) { - if (!PathOperation(beg, end, PathOperationFlag(parser.Flags))) + if (!PathOperation(beg, end, PathOperationFlag(parser.Flags))) { return ParsedBadPath; - + } Y_ASSERT(beg >= out.Beg()); out.SetPos(end); } - - FldSetNoDirty(fld, TStringBuf(beg, end)); + FldSetNoDirty(field, TStringBuf(beg, end)); // special character case const long checkChars = section.GetFlagsAllPlaintext() & FeaturesCheckSpecialChar; if (0 != checkChars) { // has unencoded special chars: check permission - const long allowChars = parser.GetFieldFlags(fld) & checkChars; - if (checkChars != allowChars) - ret = ParsedBadFormat; + const long allowChars = parser.GetFieldFlags(field) & checkChars; + if (checkChars != allowChars) { + status = ParsedBadFormat; + } } - out << '\0'; } - if (hostConverted) { + if (inHostNonAsciiChars) { char* beg = out.Buf(); - out << hostascii; - char* end = out.Buf(); - const EField fld = convertIDN ? FieldHost : FieldHostAscii; - FldSetNoDirty(fld, TStringBuf(beg, end)); + out << hostAsciiBuf; + const EField field = convertIDN ? FieldHost : FieldHostAscii; + FldSetNoDirty(field, TStringBuf(beg, out.Buf())); out << '\0'; } Buffer.Resize(out.Len()); - if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) { - if (SchemeUnknown == defscheme) - ret = ParsedBadScheme; - else - SetSchemeImpl(defscheme); + if (GetScheme() == SchemeEmpty && SchemeEmpty != defaultScheme) { + if (SchemeUnknown == defaultScheme) { + status = ParsedBadScheme; + } else { + SetSchemeImpl(defaultScheme); + } } - - if (0 == (parser.Flags & FeatureAllowEmptyPath)) + if (0 == (parser.Flags & FeatureAllowEmptyPath)) { CheckMissingFields(); + } const TStringBuf& port = GetField(FieldPort); - if (!port.empty()) { - if (!TryFromString<ui16>(port, Port)) - ret = ParsedBadPort; + if (!port.empty() && !TryFromString<ui16>(port, Port)) { + status = ParsedBadPort; + } + if (ParsedOK != status) { + return status; } - - if (ParsedOK != ret) - return ret; - // run validity checks now that all fields are set // check the host for DNS compliance - do { - if (0 == (flags & FeatureCheckHost)) - break; - if (hostascii.empty()) - hostascii = GetField(FieldHost); - if (hostascii.empty()) - break; - // IP literal - if ('[' == hostascii[0] && ']' == hostascii.back()) - break; - ret = CheckHost(hostascii); - if (ParsedOK != ret) - return ret; - } while (false); - - return ret; + if (0 != (flags & FeatureCheckHost)) { + if (hostAsciiBuf.empty()) { + hostAsciiBuf = GetField(FieldHost); + } + if (!hostAsciiBuf.empty()) { + // IP literal + if ('[' != hostAsciiBuf[0] || ']' != hostAsciiBuf.back()) { + status = CheckHost(hostAsciiBuf); + } + } + } + return status; } - TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { + TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defaultScheme, ECharset enc) { Clear(); - if (url.empty()) + if (url.empty()) { return ParsedEmpty; - - if (maxlen > 0 && url.length() > maxlen) + } + if (maxlen > 0 && url.length() > maxlen) { return ParsedTooLong; - + } const TParser parser(flags, url, enc); - return AssignImpl(parser, defscheme); + return AssignImpl(parser, defaultScheme); } TState::EParsed TUri::Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& url_base, ui32 maxlen, ECharset enc) { - const TParseFlags flags1 = flags.Exclude(FeatureNoRelPath); - TState::EParsed ret = ParseImpl(url, url_base.empty() ? flags : flags1, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; + const TParseFlags parseFlags = url_base.empty() ? flags : flags.Exclude(FeatureNoRelPath); + TState::EParsed status = ParseImpl(url, parseFlags, maxlen, SchemeEmpty, enc); + if (ParsedOK != status) { + return status; + } if (!url_base.empty() && !IsValidAbs()) { TUri base; - ret = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; + status = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc); + if (ParsedOK != status) { + return status; + } Merge(base, PathOperationFlag(flags)); } - Rewrite(); - return ret; + return status; } TState::EParsed TUri::Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags, ui32 maxlen, ECharset enc) { - const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); - if (ParsedOK != ret) - return ret; - - if (!IsValidAbs()) + const TState::EParsed status = ParseImpl(url, flags, maxlen, SchemeEmpty, enc); + if (ParsedOK != status) { + return status; + } + if (!IsValidAbs()) { Merge(base, PathOperationFlag(flags)); - + } Rewrite(); - return ret; + return status; } - TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) { - const TState::EParsed ret = ParseImpl( - url, flags | FeatureNoRelPath, maxlen, defscheme, enc); - if (ParsedOK != ret) - return ret; + TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defaultScheme, ECharset enc) { + const TState::EParsed status = ParseImpl(url, flags | FeatureNoRelPath, maxlen, defaultScheme, enc); - if (IsNull(FlagHost)) + if (ParsedOK != status) { + return status; + } + if (IsNull(FlagHost)) { return ParsedBadHost; - + } Rewrite(); return ParsedOK; } |