aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/uri/uri.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/uri/uri.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/uri/uri.cpp')
-rw-r--r--library/cpp/uri/uri.cpp623
1 files changed, 623 insertions, 0 deletions
diff --git a/library/cpp/uri/uri.cpp b/library/cpp/uri/uri.cpp
new file mode 100644
index 0000000000..1664e8c8dd
--- /dev/null
+++ b/library/cpp/uri/uri.cpp
@@ -0,0 +1,623 @@
+#include "uri.h"
+#include "parse.h"
+
+#include <util/string/cast.h>
+#include <util/string/util.h>
+#include <util/system/maxlen.h>
+#include <util/system/yassert.h>
+#include <util/generic/map.h>
+
+namespace NUri {
+ TState::EParsed TUri::CheckHost(const TStringBuf& host) {
+ if (host.empty())
+ return ParsedOK;
+
+ unsigned domainLevel = 0;
+ unsigned domainLevelOfUnderscore = 0;
+
+ bool isAlnum = false;
+ bool startLabel = true;
+ for (size_t i = 0; i != host.length(); ++i) {
+ const char ch = host[i];
+
+ if ('.' == ch) { // label separator
+ if (!isAlnum || startLabel) // previous label must end in alnum
+ return ParsedBadHost;
+ startLabel = true;
+ continue;
+ }
+
+ isAlnum = isalnum((const unsigned char)ch);
+
+ if (startLabel) { // label is starting
+ if (!isAlnum && '_' != ch) // new label must start with alnum or '_'
+ return ParsedBadHost;
+ startLabel = false;
+ ++domainLevel;
+ if (ch == '_')
+ domainLevelOfUnderscore = domainLevel;
+ continue;
+ }
+
+ if (isAlnum || '-' == ch)
+ continue;
+
+ if (ch == '_') { // non-standard case we allow for certain hosts
+ domainLevelOfUnderscore = domainLevel;
+ continue;
+ }
+
+ return ParsedBadHost;
+ }
+
+ if (0 < domainLevelOfUnderscore && domainLevel < 2 + domainLevelOfUnderscore)
+ return ParsedBadHost;
+
+ return ParsedOK;
+ }
+
+ /********************************************************/
+ TUri::TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query, const TStringBuf& scheme, unsigned defaultPort)
+ : FieldsSet(0)
+ , Port(port)
+ , DefaultPort(0)
+ , Scheme(SchemeEmpty)
+ , FieldsDirty(0)
+ {
+ if (!scheme.empty()) {
+ if (SetSchemeImpl(TSchemeInfo::Get(scheme)).Str.empty())
+ FldSet(FieldScheme, scheme);
+ }
+
+ if (0 < defaultPort) // override the scheme's default port
+ DefaultPort = static_cast<ui16>(defaultPort);
+
+ char sport[6]; // enough for ui16
+ if (0 != port) {
+ const size_t len = ToString(port, sport, sizeof(sport));
+ FldSet(FieldPort, TStringBuf(sport, len));
+ }
+
+ FldTrySet(FieldHost, host);
+ FldTrySet(FieldPath, path);
+ FldTrySet(FieldQuery, query);
+
+ Rewrite();
+ }
+
+ /********************************************************/
+ bool TUri::FldSetImpl(
+ EField field, TStringBuf value, bool strconst, bool nocopy) {
+ if (!FldIsValid(field))
+ return false;
+
+ switch (field) {
+ case FieldScheme:
+ if (!SetScheme(TSchemeInfo::Get(value)).Str.empty())
+ return false;
+ break;
+
+ case FieldPort:
+ Port = value.empty() ? 0 : FromString<ui16>(value);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!value.IsInited()) {
+ FldClr(field);
+ return false;
+ }
+
+ if (strconst) { // string constants don't need to be saved in the buffer
+ FldMarkClean(field);
+ FldSetNoDirty(field, value);
+ return false;
+ }
+
+ if (nocopy) {
+ FldSet(field, value);
+ return true;
+ }
+
+ return FldTryCpy(field, value);
+ }
+
+ /********************************************************/
+ bool TUri::FldTryCpy(EField field, const TStringBuf& value) {
+ if (!FldIsDirty(field)) {
+ do {
+ if (!FldIsSet(field))
+ break;
+
+ TStringBuf& fld = Fields[field];
+ if (fld.length() < value.length())
+ break;
+
+ char* oldV = (char*)fld.data();
+ if (!IsInBuffer(oldV))
+ break;
+
+ memcpy(oldV, value.data(), value.length());
+ oldV[value.length()] = 0;
+ fld.Trunc(value.length());
+ return false;
+ } while (false);
+
+ FldMarkDirty(field);
+ }
+
+ FldSetNoDirty(field, value);
+ return true;
+ }
+
+ /********************************************************/
+ void TUri::RewriteImpl() {
+ size_t len = 0;
+ for (int i = 0; i < FieldAllMAX; ++i) {
+ const EField fld = EField(i);
+ if (FldIsSet(fld))
+ len += 1 + Fields[fld].length();
+ }
+
+ if (!len)
+ Buffer.Clear();
+ else {
+ TBuffer newbuf;
+ newbuf.Resize(len);
+ TMemoryWriteBuffer out(newbuf.data(), newbuf.size());
+ for (int i = 0; i < FieldAllMAX; ++i) {
+ const EField fld = EField(i);
+ if (!FldIsSet(fld))
+ continue;
+
+ const char* beg = out.Buf();
+ const TStringBuf& val = Fields[fld];
+ out << val;
+ FldSetNoDirty(fld, TStringBuf(beg, val.length()));
+ out << '\0';
+ }
+ Buffer = std::move(newbuf);
+ }
+
+ CheckMissingFields();
+
+ FieldsDirty = 0;
+ }
+
+ void TUri::CheckMissingFields() {
+ // if host is set but path is not...
+ if (FldSetCmp(FlagPath | FlagHost, FlagHost))
+ // ... and the scheme requires a path...
+ if (GetSchemeInfo().FldReq & FlagPath)
+ // ... set path
+ FldSetNoDirty(FieldPath, TStringBuf("/"));
+ }
+
+ /********************************************************/
+ void TUri::Merge(const TUri& base, int correctAbs) {
+ if (base.Scheme == SchemeUnknown)
+ return;
+
+ if (!base.IsValidGlobal())
+ return;
+
+ const TStringBuf& selfscheme = GetField(FieldScheme);
+ // basescheme is present since IsValidGlobal() succeeded
+ const TStringBuf& basescheme = base.GetField(FieldScheme);
+ const bool noscheme = !selfscheme.IsInited();
+ if (!noscheme && !EqualNoCase(selfscheme, basescheme))
+ return;
+
+ const ui32 cleanFields = ~FieldsDirty;
+ do {
+ static constexpr TStringBuf rootPath = "/";
+
+ if (noscheme) {
+ if (!basescheme.empty()) {
+ FldSetNoDirty(FieldScheme, basescheme);
+ // check if it is canonical
+ if (basescheme.data() != base.GetSchemeInfo().Str.data())
+ FldMarkDirty(FieldScheme);
+ }
+ Scheme = base.Scheme;
+ DefaultPort = base.DefaultPort;
+ }
+
+ if (!IsNull(FlagHost))
+ break; // no merge
+
+ FldTrySet(FieldHost, base);
+ FldChkSet(FieldPort, base);
+ Port = base.Port;
+
+ if (noscheme && IsNull(FlagQuery) && IsNull(FlagPath))
+ FldTrySet(FieldQuery, base);
+
+ if (IsNull(FlagAuth) && !base.IsNull(FlagAuth)) {
+ FldChkSet(FieldUser, base);
+ FldChkSet(FieldPass, base);
+ }
+
+ if (IsValidAbs())
+ break;
+
+ TStringBuf p0 = base.GetField(FieldPath);
+ if (!p0.IsInited())
+ p0 = rootPath;
+
+ TStringBuf p1 = GetField(FieldPath);
+ if (!p1.IsInited()) {
+ if (p0.data() != rootPath.data())
+ FldSet(FieldPath, p0);
+ else
+ FldSetNoDirty(FieldPath, rootPath);
+ break;
+ }
+ if (p1 && '/' == p1[0])
+ p1.Skip(1); // p0 will have one
+
+ bool pathop = true;
+
+ TTempBufOutput out(p0.length() + p1.length() + 4);
+ out << p0;
+ if ('/' != p0.back())
+ out << "/../";
+ else if (p1.empty() || '.' != p1[0])
+ pathop = false;
+ out << p1;
+
+ char* beg = out.Data();
+ char* end = beg + out.Filled();
+ if (pathop && !PathOperation(beg, end, correctAbs)) {
+ Clear();
+ break;
+ }
+
+ // Needs immediate forced rewrite because of TTempBuf
+ FldSetNoDirty(FieldPath, TStringBuf(beg, end));
+ RewriteImpl();
+ } while (false);
+
+ CheckMissingFields();
+
+ // rewrite only if borrowed fields from base
+ if (cleanFields & FieldsDirty)
+ RewriteImpl();
+ }
+
+ /********************************************************/
+ TUri::TLinkType TUri::Normalize(const TUri& base,
+ const TStringBuf& link, const TStringBuf& codebase, long careFlags, ECharset enc) {
+ // parse URL
+ if (ParsedOK != ParseImpl(link, careFlags, 0, SchemeEmpty, enc))
+ return LinkIsBad;
+
+ const TStringBuf& host = GetHost();
+
+ // merge with base URL
+ // taken either from _BASE_ property or from optional argument
+ if (!codebase.empty()) {
+ // if optional code base given -- parse it
+ TUri codebaseUrl;
+ if (codebaseUrl.ParseImpl(codebase, careFlags, 0, SchemeEmpty, enc) != ParsedOK || !codebaseUrl.IsValidAbs())
+ return LinkIsBad;
+ Merge(codebaseUrl);
+ } else {
+ // Base is already in this variable
+ // see SetProperty() for details
+ Merge(base);
+ }
+
+ // check result: must be correct absolute URL
+ if (!IsValidAbs())
+ return LinkBadAbs;
+
+ if (!host.empty()) {
+ // - we don't care about different ports for the same server
+ // - we don't care about win|www|koi|etc. preffixes for the same server
+ if (GetPort() != base.GetPort() || !EqualNoCase(host, base.GetHost()))
+ return LinkIsGlobal;
+ }
+
+ // find out if it is link to itself then ignore it
+ if (!Compare(base, FlagPath | FlagQuery))
+ return LinkIsFragment;
+
+ return LinkIsLocal;
+ }
+
+ /********************************************************/
+
+ size_t TUri::PrintSize(ui32 flags) const {
+ size_t len = 10;
+ flags &= FieldsSet; // can't output what we don't have
+ if (flags & FlagHostAscii)
+ flags &= ~FlagHost; // don't want to print both of them
+ ui32 opt = 1;
+ for (int fld = 0; opt <= flags && fld < FieldAllMAX; ++fld, opt <<= 1) {
+ if (opt & flags) {
+ const TStringBuf& v = Fields[fld];
+ if (v.IsInited()) {
+ if (opt & FlagAuth)
+ len += 3 * v.length() + 1;
+ else
+ len += v.length() + 1;
+ }
+ }
+ }
+
+ return len;
+ }
+
+ IOutputStream& TUri::PrintImpl(IOutputStream& out, int flags) const {
+ TStringBuf v;
+
+ const int wantFlags = flags; // save the original
+ flags &= FieldsSet; // can't print what we don't have
+ if (flags & FlagHostAscii)
+ flags |= FlagHost; // to make host checks simpler below
+
+ if (flags & FlagScheme) {
+ v = Fields[FieldScheme];
+ if (!v.empty())
+ out << v << ':';
+ }
+
+ TStringBuf host;
+ if (flags & FlagHost) {
+ const EField fldhost =
+ flags & FlagHostAscii ? FieldHostAscii : FieldHost;
+ host = Fields[fldhost];
+ }
+
+ TStringBuf port;
+ if ((flags & FlagPort) && 0 != Port && Port != DefaultPort)
+ port = Fields[FieldPort];
+
+ if (host) {
+ if (wantFlags & FlagScheme)
+ out << "//";
+
+ if (flags & FlagAuth) {
+ if (flags & FlagUser) {
+ v = Fields[FieldUser];
+ if (!v.empty())
+ TEncoder::EncodeNotAlnum(out, v);
+ }
+
+ if (flags & FlagPass) {
+ v = Fields[FieldPass];
+ if (v.IsInited()) {
+ out << ':';
+ TEncoder::EncodeAll(out, v);
+ }
+ }
+
+ out << '@';
+ }
+
+ out << host;
+
+ if (port)
+ out << ':';
+ }
+ if (port)
+ out << port;
+
+ if (flags & FlagPath) {
+ v = Fields[FieldPath];
+ // for relative, empty path is not the same as missing
+ if (v.empty() && 0 == (flags & FlagHost))
+ v = TStringBuf(".");
+ out << v;
+ }
+
+ if (flags & FlagQuery) {
+ v = Fields[FieldQuery];
+ if (v.IsInited())
+ out << '?' << v;
+ }
+
+ if (flags & FlagFrag) {
+ v = Fields[FieldFrag];
+ if (v.IsInited())
+ out << '#' << v;
+ }
+
+ return out;
+ }
+
+ /********************************************************/
+ int TUri::CompareField(EField fld, const TUri& url) const {
+ const TStringBuf& v0 = GetField(fld);
+ const TStringBuf& v1 = url.GetField(fld);
+ switch (fld) {
+ case FieldScheme:
+ case FieldHost:
+ return CompareNoCase(v0, v1);
+ default:
+ return v0.compare(v1);
+ }
+ }
+
+ /********************************************************/
+ int TUri::Compare(const TUri& url, int flags) const {
+ // first compare fields with default values
+ if (flags & FlagPort) {
+ const int ret = GetPort() - url.GetPort();
+ if (ret)
+ return ret;
+ flags &= ~FlagPort;
+ }
+
+ // compare remaining sets of available fields
+ const int rtflags = flags & url.FieldsSet;
+ flags &= FieldsSet;
+ const int fldcmp = flags - rtflags;
+ if (fldcmp)
+ return fldcmp;
+
+ // field sets are the same, compare the fields themselves
+ for (int i = 0; i < FieldAllMAX; ++i) {
+ const EField fld = EField(i);
+ if (flags & FldFlag(fld)) {
+ const int ret = CompareField(fld, url);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+ }
+
+ /********************************************************/
+ bool TUri::PathOperation(char*& pathPtr, char*& pathEnd, int correctAbs) {
+ if (!pathPtr)
+ return false;
+ if (pathPtr == pathEnd)
+ return true;
+
+ if ((pathEnd - pathPtr) >= 2 && *(pathEnd - 2) == '/' && *(pathEnd - 1) == '.') {
+ --pathEnd;
+ }
+
+ char* p_wr = pathEnd;
+ int upCount = 0;
+
+ char* p_prev = pathEnd;
+ Y_ASSERT(p_prev > pathPtr);
+ while (p_prev > pathPtr && *(p_prev - 1) == '/')
+ p_prev--;
+
+ for (char* p_rd = p_prev; p_rd; p_rd = p_prev) {
+ Y_ASSERT(p_rd == pathEnd || p_rd[0] == '/');
+ p_prev = nullptr;
+
+ char* p = p_rd;
+
+ if (p > pathPtr) {
+ for (p--; *p != '/'; p--) {
+ if (p == pathPtr)
+ break;
+ }
+ if (*p == '/') {
+ p_prev = p++;
+ if ((p_prev - pathPtr >= 6 && !strnicmp(p_prev - 6, "http://", 7)) ||
+ (p_prev - pathPtr >= 7 && !strnicmp(p_prev - 7, "https://", 8))) {
+ --p_prev;
+ --p;
+ } else {
+ //skip multiple from head '/'
+ while (p_prev > pathPtr && *(p_prev - 1) == '/')
+ p_prev--;
+ }
+ }
+ }
+
+ Y_ASSERT(p_prev == nullptr || p_prev[0] == '/');
+ //and the first symbol !='/' after p_prev is p
+
+ if (p == p_rd) {
+ //empty block:
+ if (p_prev) { //either tail:
+ Y_ASSERT(p_rd == p_wr && *(p - 1) == '/');
+ --p_wr;
+ continue;
+ } else { //or head of abs path
+ *(--p_wr) = '/';
+ break;
+ }
+ }
+
+ if (p[0] == '.') {
+ if (p + 1 == p_rd) {
+ if (correctAbs || p_prev > pathPtr || pathPtr[0] != '/')
+ // ignore "./"
+ continue;
+ } else {
+ if ((p[1] == '.') && (p + 2 == p_rd)) {
+ // register "../" but not print
+ upCount++;
+ continue;
+ }
+ }
+ }
+
+ if (upCount) {
+ //unregister "../" and not print
+ upCount--;
+ continue;
+ }
+
+ // print
+ Y_ASSERT(p < p_rd);
+ Y_ASSERT(!p_prev || *(p - 1) == '/');
+ if (p_wr == p_rd) { //just skip
+ p_wr = p;
+ } else { //copy
+ int l = p_rd - p + 1;
+ p_wr -= l;
+ memmove(p_wr, p, l);
+ }
+ }
+
+ if (upCount) {
+ if (*pathPtr != '/') {
+ if (pathEnd == p_wr && *(p_wr - 1) == '.') {
+ Y_ASSERT(*(p_wr - 2) == '.');
+ p_wr -= 2;
+ upCount--;
+ }
+ for (; upCount > 0; upCount--) {
+ *(--p_wr) = '/';
+ *(--p_wr) = '.';
+ *(--p_wr) = '.';
+ }
+ } else {
+ if (correctAbs > 0)
+ return false;
+ if (correctAbs == 0) {
+ //Bad path but present in RFC:
+ // "Similarly, parsers must avoid treating "." and ".."
+ // as special when they are not complete components of
+ // a relative path. "
+ for (; upCount > 0; upCount--) {
+ *(--p_wr) = '.';
+ *(--p_wr) = '.';
+ *(--p_wr) = '/';
+ }
+ } else {
+ upCount = false;
+ }
+ }
+ }
+
+ Y_ASSERT(p_wr >= pathPtr);
+
+ if (upCount)
+ return false;
+ pathPtr = p_wr;
+ return true;
+ }
+
+ /********************************************************/
+ const char* LinkTypeToString(const TUri::TLinkType& t) {
+ switch (t) {
+ case TUri::LinkIsBad:
+ return "LinkIsBad";
+ case TUri::LinkBadAbs:
+ return "LinkBadAbs";
+ case TUri::LinkIsFragment:
+ return "LinkIsFragment";
+ case TUri::LinkIsLocal:
+ return "LinkIsLocal";
+ case TUri::LinkIsGlobal:
+ return "LinkIsGlobal";
+ }
+ Y_ASSERT(0);
+ return "";
+ }
+
+}