aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/uri
diff options
context:
space:
mode:
authorsocialgraph <socialgraph@yandex-team.com>2022-08-03 13:01:16 +0300
committersocialgraph <socialgraph@yandex-team.com>2022-08-03 13:01:16 +0300
commit15c8b667f31654db48aa974062cc70aade3e25e3 (patch)
tree53a3ac13e2e684fc5d5eb8fbffcd4586613cc496 /library/cpp/uri
parentcb684f6475d0ce37cbb8ba4123d191fd5e877161 (diff)
downloadydb-15c8b667f31654db48aa974062cc70aade3e25e3.tar.gz
refactor assign
Чтобы было проще работать с кодом для задачипровел рефакторинг кода. - избавился от while(false) - избавился от излишних сокращений в названии локальных переменных - добавил фигурные скобки
Diffstat (limited to 'library/cpp/uri')
-rw-r--r--library/cpp/uri/assign.cpp427
1 files changed, 220 insertions, 207 deletions
diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp
index 224b0ff6ef..97122c445a 100644
--- a/library/cpp/uri/assign.cpp
+++ b/library/cpp/uri/assign.cpp
@@ -16,8 +16,9 @@ namespace NUri {
// proper stringprep and splitting on dot-equivalent characters
char* buf;
static_assert(sizeof(*idna) == sizeof(ui32), "fixme");
- if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*)idna, &buf, 0))
+ if (IDNA_SUCCESS != idna_to_ascii_4z((const uint32_t*) idna, &buf, 0)) {
buf = nullptr;
+ }
return buf;
}
@@ -32,13 +33,13 @@ namespace NUri {
}
TStringBuf TUri::HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc) {
- TStringBuf outhost; // store the result here before returning it, to get RVO
+ TStringBuf outHost; // store the result here before returning it, to get RVO
size_t buflen = 0;
- if (hasExtended && !allowIDN)
- return outhost; // definitely can't convert
-
+ if (hasExtended && !allowIDN) {
+ return outHost; // definitely can't convert
+ }
// charset-recode: RFC 3986, 3.2.2, requires percent-encoded non-ASCII
// chars in reg-name to be UTF-8 so convert to UTF-8 prior to decoding
const bool recoding = CODES_UTF8 != enc && hasExtended;
@@ -46,8 +47,9 @@ namespace NUri {
size_t nrd, nwr;
buflen = host.length() * 4;
buf.Reset(static_cast<char*>(y_allocate(buflen)));
- if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr))
- return outhost;
+ if (RECODE_OK != Recode(enc, CODES_UTF8, host.data(), buf.Get(), host.length(), buflen, nrd, nwr)) {
+ return outHost;
+ }
host = TStringBuf(buf.Get(), nwr);
}
@@ -63,15 +65,16 @@ namespace NUri {
hasExtended = 0 != (outFlags & FeatureEncodeExtendedASCII);
// check again
- if (hasExtended && !allowIDN)
- return outhost;
+ if (hasExtended && !allowIDN) {
+ return outHost;
+ }
host = out.Str();
// convert to punycode if needed
if (!hasExtended) {
- outhost = host;
- return outhost;
+ outHost = host;
+ return outHost;
}
TMallocPtr<char> puny;
@@ -82,342 +85,352 @@ namespace NUri {
if (!puny) {
// XXX: try user charset unless UTF8 or converted to it
- if (CODES_UTF8 == enc || recoding)
- return outhost;
+ if (CODES_UTF8 == enc || recoding) {
+ return outHost;
+ }
try {
puny = IDNToAscii(host, enc);
} catch (const yexception& /* exc */) {
- return outhost;
+ return outHost;
+ }
+ if (!puny) {
+ return outHost;
}
- if (!puny)
- return outhost;
}
buf = puny;
- outhost = buf.Get();
+ outHost = buf.Get();
- return outhost;
+ return outHost;
}
TStringBuf TUri::HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc) {
// find what we have
long haveFlags = 0;
- for (size_t i = 0; i != host.length(); ++i)
+ for (size_t i = 0; i != host.length(); ++i) {
haveFlags |= TEncoder::GetFlags(host[i]).FeatFlags;
-
+ }
// interested in encoded characters or (if IDN is allowed) extended ascii
- TStringBuf outhost;
+ TStringBuf outHost;
const bool haveExtended = haveFlags & FeatureEncodeExtendedASCII;
if (!haveExtended || allowIDN) {
- if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY))
- outhost = host;
- else
- outhost = HostToAscii(host, buf, haveExtended, allowIDN, enc);
+ if (!haveExtended && 0 == (haveFlags & FeatureDecodeANY)) {
+ outHost = host;
+ } else {
+ outHost = HostToAscii(host, buf, haveExtended, allowIDN, enc);
+ }
}
-
- return outhost;
+ return outHost;
}
- static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField fld, const TStringBuf& val, long flags) {
- if (val.empty())
+ static inline bool AppendField(TMemoryWriteBuffer& out, TField::EField field, const TStringBuf& value, long flags) {
+ if (value.empty()) {
return false;
- if (flags & TFeature::FeaturesAllEncoder)
- TUri::ReEncodeField(out, val, fld, flags);
- else
- out << val;
+ }
+ if (flags & TFeature::FeaturesAllEncoder) {
+ TUri::ReEncodeField(out, value, field, flags);
+ } else {
+ out << value;
+ }
return true;
}
- TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defscheme) {
+ TState::EParsed TUri::AssignImpl(const TParser& parser, TScheme::EKind defaultScheme) {
Clear();
- TState::EParsed ret = parser.State;
- if (ParsedBadFormat <= ret)
- return ret;
-
+ TState::EParsed status = parser.State;
+ if (ParsedBadFormat <= status) {
+ return status;
+ }
const TSection& scheme = parser.Get(FieldScheme);
const TSchemeInfo& schemeInfo = SetSchemeImpl(parser.Scheme);
// set the scheme always if available
- if (schemeInfo.Str.empty() && scheme.IsSet())
+ if (schemeInfo.Str.empty() && scheme.IsSet()) {
FldSet(FieldScheme, scheme.Get());
-
- if (ParsedOK != ret)
- return ret;
-
+ }
+ if (ParsedOK != status) {
+ return status;
+ }
size_t buflen = 0;
// special processing for fields
const bool convertIDN = parser.Flags & FeatureConvertHostIDN;
long flags = parser.Flags.Allow;
- if (convertIDN)
+ if (convertIDN) {
flags |= FeatureAllowHostIDN | FeatureCheckHost;
+ }
// process non-ASCII host for punycode
- TMallocPtr<char> hostptr;
- TStringBuf hostascii; // empty: use host field; non-empty: ascii
- bool hostConverted = false; // hostascii is empty or the original
+ TMallocPtr<char> hostPtr;
+ TStringBuf hostAsciiBuf;
+ bool inHostNonAsciiChars = false;
+
const TSection& host = parser.Get(FieldHost);
if (host.IsSet() && !FldIsSet(FieldHost)) {
const bool allowIDN = (flags & FeatureAllowHostIDN);
- const TStringBuf hostbuf = host.Get();
+ const TStringBuf hostBuf = host.Get();
// if we know we have and allow extended-ASCII chars, no need to check further
- if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII))
- hostascii = HostToAscii(hostbuf, hostptr, true, true, parser.Enc);
- else
- hostascii = HostToAscii(hostbuf, hostptr, allowIDN, parser.Enc);
-
- if (hostascii.empty())
- ret = ParsedBadHost; // exists but cannot be converted
- else if (hostbuf.data() != hostascii.data()) {
- hostConverted = true;
- buflen += 1 + hostascii.length();
- if (convertIDN)
+ if (allowIDN && (host.GetFlagsAllPlaintext() & FeatureEncodeExtendedASCII)) {
+ hostAsciiBuf = HostToAscii(hostBuf, hostPtr, true, true, parser.Enc);
+ } else {
+ hostAsciiBuf = HostToAscii(hostBuf, hostPtr, allowIDN, parser.Enc);
+ }
+
+ if (hostAsciiBuf.empty()) {
+ status = ParsedBadHost; // exists but cannot be converted
+
+ } else if (hostBuf.data() != hostAsciiBuf.data()) {
+ inHostNonAsciiChars = true;
+
+ buflen += 1 + hostAsciiBuf.length();
+ if (convertIDN) {
FldMarkSet(FieldHost); // so that we don't process host below
+ }
}
}
// add unprocessed fields
- for (int idx = 0; idx < FieldUrlMAX; ++idx) {
- const EField fld = EField(idx);
- const TSection& section = parser.Get(fld);
- if (section.IsSet() && !FldIsSet(fld))
+ for (int i = 0; i < FieldUrlMAX; ++i) {
+ const EField field = EField(i);
+ const TSection& section = parser.Get(field);
+
+ if (section.IsSet() && !FldIsSet(field)) {
buflen += 1 + section.EncodedLen(); // includes null
+ }
+ }
+ if (0 == buflen) { // no more sections set?
+ return status;
}
- if (0 == buflen) // no more sections set?
- return ret;
// process #! fragments
// https://developers.google.com/webmasters/ajax-crawling/docs/specification
- static const TStringBuf escFragPrefix(TStringBuf("_escaped_fragment_="));
+ static const TStringBuf escapedFragment(TStringBuf("_escaped_fragment_="));
- bool encHashBangFrag = false;
- TStringBuf qryBeforeEscapedFragment;
- TStringBuf qryEscapedFragment;
- do {
- if (FldIsSet(FieldFrag) || FldIsSet(FieldQuery))
- break;
+ bool encodeHashBang = false;
+ TStringBuf queryBeforeEscapedFragment;
+ TStringBuf queryEscapedFragment;
+ if (!FldIsSet(FieldFrag) && !FldIsSet(FieldQuery)) {
const TSection& frag = parser.Get(FieldFrag);
+
if (frag.IsSet()) {
- if (0 == (parser.Flags & FeatureHashBangToEscapedFragment))
- break;
- const TStringBuf fragbuf = frag.Get();
- if (fragbuf.empty() || '!' != fragbuf[0])
- break;
- encHashBangFrag = true;
- // '!' will make space for '&' or '\0' if needed
- buflen += escFragPrefix.length();
- buflen += 2 * fragbuf.length(); // we don't know how many will be encoded
+ if (0 != (parser.Flags & FeatureHashBangToEscapedFragment)) {
+ const TStringBuf fragBuf = frag.Get();
+ if (!fragBuf.empty() && '!' == fragBuf[0]) {
+ encodeHashBang = true;
+ // '!' will make space for '&' or '\0' if needed
+ buflen += escapedFragment.length();
+ buflen += 2 * fragBuf.length(); // we don't know how many will be encoded
+ }
+ }
} else {
- const TSection& qry = parser.Get(FieldQuery);
- if (!qry.IsSet())
- break;
- // FeatureHashBangToEscapedFragment has preference
- if (FeatureEscapedToHashBangFragment != (parser.Flags & FeaturesEscapedFragment))
- break;
- qry.Get().RSplit('&', qryBeforeEscapedFragment, qryEscapedFragment);
- if (!qryEscapedFragment.StartsWith(escFragPrefix)) {
- qryEscapedFragment.Clear();
- break;
+ const TSection& query = parser.Get(FieldQuery);
+ if (query.IsSet()) {
+ // FeatureHashBangToEscapedFragment has preference
+ if (FeatureEscapedToHashBangFragment == (parser.Flags & FeaturesEscapedFragment)) {
+ const TStringBuf queryBuf = query.Get();
+
+ queryBuf.RSplit('&', queryBeforeEscapedFragment, queryEscapedFragment);
+ if (queryEscapedFragment.StartsWith(escapedFragment)) {
+ queryEscapedFragment.Skip(escapedFragment.length());
+ buflen += 2; // for '!' and '\0' in fragment
+ buflen -= escapedFragment.length();
+ } else {
+ queryEscapedFragment.Clear();
+ }
+ }
}
- qryEscapedFragment.Skip(escFragPrefix.length());
- buflen += 2; // for '!' and '\0' in fragment
- buflen -= escFragPrefix.length();
}
- } while (false);
+ }
// now set all fields prior to validating
Alloc(buflen);
TMemoryWriteBuffer out(Buffer.data(), Buffer.size());
- for (int idx = 0; idx < FieldUrlMAX; ++idx) {
- const EField fld = EField(idx);
+ for (int i = 0; i < FieldUrlMAX; ++i) {
+ const EField field = EField(i);
- const TSection& section = parser.Get(fld);
- if (!section.IsSet() || FldIsSet(fld))
+ const TSection& section = parser.Get(field);
+ if (!section.IsSet() || FldIsSet(field)) {
continue;
-
- if (FieldQuery == fld && encHashBangFrag)
+ }
+ if (FieldQuery == field && encodeHashBang) {
continue;
-
- if (FieldFrag == fld && qryEscapedFragment.IsInited())
+ }
+ if (FieldFrag == field && queryEscapedFragment.IsInited()) {
continue;
+ }
char* beg = out.Buf();
- TStringBuf val = section.Get();
+ TStringBuf value = section.Get();
long careFlags = section.GetFlagsEncode();
- switch (fld) {
- default:
- break;
-
- case FieldQuery:
- if (qryEscapedFragment.IsInited()) {
- const EField dstfld = FieldFrag; // that's where we will store
- out << '!';
- if (!qryEscapedFragment.empty())
- ReEncodeToField(out, qryEscapedFragment, fld, FeatureDecodeANY | careFlags, dstfld, FeatureDecodeANY | parser.GetFieldFlags(dstfld));
- FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf()));
- if (qryBeforeEscapedFragment.empty())
- continue;
- out << '\0';
- beg = out.Buf();
- val = qryBeforeEscapedFragment;
+ if (field == FieldQuery) {
+ if (queryEscapedFragment.IsInited()) {
+ out << '!';
+ if (!queryEscapedFragment.empty()) {
+ ReEncodeToField(
+ out, queryEscapedFragment,
+ FieldQuery, FeatureDecodeANY | careFlags,
+ FieldFrag, FeatureDecodeANY | parser.GetFieldFlags(FieldFrag)
+ );
}
- break;
-
- case FieldFrag:
- if (encHashBangFrag) {
- const EField dstfld = FieldQuery; // that's where we will store
- const TSection& qry = parser.Get(dstfld);
- if (qry.IsSet())
- if (AppendField(out, dstfld, qry.Get(), qry.GetFlagsEncode()))
- out << '&';
- out << escFragPrefix;
- val.Skip(1); // skip '!'
- ReEncodeToField(out, val, fld, careFlags, dstfld, parser.GetFieldFlags(dstfld));
- FldSetNoDirty(dstfld, TStringBuf(beg, out.Buf()));
+ FldSetNoDirty(FieldFrag, TStringBuf(beg, out.Buf()));
+ if (queryBeforeEscapedFragment.empty()) {
continue;
}
- break;
+ out << '\0';
+ beg = out.Buf();
+ value = queryBeforeEscapedFragment;
+ }
+ } else if (field == FieldFrag) {
+ if (encodeHashBang) {
+ const TSection& query = parser.Get(FieldQuery);
+ if (query.IsSet() && AppendField(out, FieldQuery, query.Get(), query.GetFlagsEncode())) {
+ out << '&';
+ }
+ out << escapedFragment;
+ value.Skip(1); // skip '!'
+ ReEncodeToField(
+ out, value,
+ FieldFrag, careFlags,
+ FieldQuery, parser.GetFieldFlags(FieldQuery)
+ );
+ FldSetNoDirty(FieldQuery, TStringBuf(beg, out.Buf()));
+ continue;
+ }
}
- AppendField(out, fld, val, careFlags);
+ AppendField(out, field, value, careFlags);
char* end = out.Buf();
if (careFlags & FeaturePathOperation) {
- if (!PathOperation(beg, end, PathOperationFlag(parser.Flags)))
+ if (!PathOperation(beg, end, PathOperationFlag(parser.Flags))) {
return ParsedBadPath;
-
+ }
Y_ASSERT(beg >= out.Beg());
out.SetPos(end);
}
-
- FldSetNoDirty(fld, TStringBuf(beg, end));
+ FldSetNoDirty(field, TStringBuf(beg, end));
// special character case
const long checkChars = section.GetFlagsAllPlaintext() & FeaturesCheckSpecialChar;
if (0 != checkChars) { // has unencoded special chars: check permission
- const long allowChars = parser.GetFieldFlags(fld) & checkChars;
- if (checkChars != allowChars)
- ret = ParsedBadFormat;
+ const long allowChars = parser.GetFieldFlags(field) & checkChars;
+ if (checkChars != allowChars) {
+ status = ParsedBadFormat;
+ }
}
-
out << '\0';
}
- if (hostConverted) {
+ if (inHostNonAsciiChars) {
char* beg = out.Buf();
- out << hostascii;
- char* end = out.Buf();
- const EField fld = convertIDN ? FieldHost : FieldHostAscii;
- FldSetNoDirty(fld, TStringBuf(beg, end));
+ out << hostAsciiBuf;
+ const EField field = convertIDN ? FieldHost : FieldHostAscii;
+ FldSetNoDirty(field, TStringBuf(beg, out.Buf()));
out << '\0';
}
Buffer.Resize(out.Len());
- if (GetScheme() == SchemeEmpty && SchemeEmpty != defscheme) {
- if (SchemeUnknown == defscheme)
- ret = ParsedBadScheme;
- else
- SetSchemeImpl(defscheme);
+ if (GetScheme() == SchemeEmpty && SchemeEmpty != defaultScheme) {
+ if (SchemeUnknown == defaultScheme) {
+ status = ParsedBadScheme;
+ } else {
+ SetSchemeImpl(defaultScheme);
+ }
}
-
- if (0 == (parser.Flags & FeatureAllowEmptyPath))
+ if (0 == (parser.Flags & FeatureAllowEmptyPath)) {
CheckMissingFields();
+ }
const TStringBuf& port = GetField(FieldPort);
- if (!port.empty()) {
- if (!TryFromString<ui16>(port, Port))
- ret = ParsedBadPort;
+ if (!port.empty() && !TryFromString<ui16>(port, Port)) {
+ status = ParsedBadPort;
+ }
+ if (ParsedOK != status) {
+ return status;
}
-
- if (ParsedOK != ret)
- return ret;
-
// run validity checks now that all fields are set
// check the host for DNS compliance
- do {
- if (0 == (flags & FeatureCheckHost))
- break;
- if (hostascii.empty())
- hostascii = GetField(FieldHost);
- if (hostascii.empty())
- break;
- // IP literal
- if ('[' == hostascii[0] && ']' == hostascii.back())
- break;
- ret = CheckHost(hostascii);
- if (ParsedOK != ret)
- return ret;
- } while (false);
-
- return ret;
+ if (0 != (flags & FeatureCheckHost)) {
+ if (hostAsciiBuf.empty()) {
+ hostAsciiBuf = GetField(FieldHost);
+ }
+ if (!hostAsciiBuf.empty()) {
+ // IP literal
+ if ('[' != hostAsciiBuf[0] || ']' != hostAsciiBuf.back()) {
+ status = CheckHost(hostAsciiBuf);
+ }
+ }
+ }
+ return status;
}
- TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) {
+ TState::EParsed TUri::ParseImpl(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defaultScheme, ECharset enc) {
Clear();
- if (url.empty())
+ if (url.empty()) {
return ParsedEmpty;
-
- if (maxlen > 0 && url.length() > maxlen)
+ }
+ if (maxlen > 0 && url.length() > maxlen) {
return ParsedTooLong;
-
+ }
const TParser parser(flags, url, enc);
- return AssignImpl(parser, defscheme);
+ return AssignImpl(parser, defaultScheme);
}
TState::EParsed TUri::Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& url_base, ui32 maxlen, ECharset enc) {
- const TParseFlags flags1 = flags.Exclude(FeatureNoRelPath);
- TState::EParsed ret = ParseImpl(url, url_base.empty() ? flags : flags1, maxlen, SchemeEmpty, enc);
- if (ParsedOK != ret)
- return ret;
+ const TParseFlags parseFlags = url_base.empty() ? flags : flags.Exclude(FeatureNoRelPath);
+ TState::EParsed status = ParseImpl(url, parseFlags, maxlen, SchemeEmpty, enc);
+ if (ParsedOK != status) {
+ return status;
+ }
if (!url_base.empty() && !IsValidAbs()) {
TUri base;
- ret = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc);
- if (ParsedOK != ret)
- return ret;
+ status = base.ParseImpl(url_base, flags, maxlen, SchemeEmpty, enc);
+ if (ParsedOK != status) {
+ return status;
+ }
Merge(base, PathOperationFlag(flags));
}
-
Rewrite();
- return ret;
+ return status;
}
TState::EParsed TUri::Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags, ui32 maxlen, ECharset enc) {
- const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc);
- if (ParsedOK != ret)
- return ret;
-
- if (!IsValidAbs())
+ const TState::EParsed status = ParseImpl(url, flags, maxlen, SchemeEmpty, enc);
+ if (ParsedOK != status) {
+ return status;
+ }
+ if (!IsValidAbs()) {
Merge(base, PathOperationFlag(flags));
-
+ }
Rewrite();
- return ret;
+ return status;
}
- TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defscheme, ECharset enc) {
- const TState::EParsed ret = ParseImpl(
- url, flags | FeatureNoRelPath, maxlen, defscheme, enc);
- if (ParsedOK != ret)
- return ret;
+ TState::EParsed TUri::ParseAbsUri(const TStringBuf& url, const TParseFlags& flags, ui32 maxlen, TScheme::EKind defaultScheme, ECharset enc) {
+ const TState::EParsed status = ParseImpl(url, flags | FeatureNoRelPath, maxlen, defaultScheme, enc);
- if (IsNull(FlagHost))
+ if (ParsedOK != status) {
+ return status;
+ }
+ if (IsNull(FlagHost)) {
return ParsedBadHost;
-
+ }
Rewrite();
return ParsedOK;
}