#pragma once
#include "common.h"
#include "encode.h"
#include <library/cpp/charset/doccodes.h>
#include <util/generic/buffer.h>
#include <util/generic/ptr.h>
#include <util/generic/singleton.h>
#include <util/generic/string.h>
#include <util/memory/alloc.h>
#include <util/stream/mem.h>
#include <util/stream/output.h>
#include <util/stream/str.h>
#include <util/system/yassert.h>
#include <cstdlib>
namespace NUri {
/********************************************************/
class TUri
: public TFeature,
public TField,
public TScheme,
public TState {
public:
enum TLinkType {
LinkIsBad,
LinkBadAbs,
LinkIsFragment,
LinkIsLocal,
LinkIsGlobal
};
private:
TBuffer Buffer;
TStringBuf Fields[FieldAllMAX];
ui32 FieldsSet;
ui16 Port;
ui16 DefaultPort;
TScheme::EKind Scheme;
/// contains fields out of buffer (and possibly not null-terminated)
ui32 FieldsDirty;
private:
void Alloc(size_t len) {
Dealloc(); // to prevent copy below
Buffer.Resize(len);
}
void Dealloc() {
Buffer.Clear();
}
void ClearImpl() {
Port = 0;
FieldsSet = 0;
Scheme = SchemeEmpty;
FieldsDirty = 0;
}
void CopyData(const TUri& url) {
FieldsSet = url.FieldsSet;
Port = url.Port;
DefaultPort = url.DefaultPort;
Scheme = url.Scheme;
FieldsDirty = url.FieldsDirty;
}
void CopyImpl(const TUri& url) {
for (int i = 0; i < FieldAllMAX; ++i)
Fields[i] = url.Fields[i];
RewriteImpl();
}
private:
static ui32 FldFlag(EField fld) {
return 1 << fld;
}
public:
static bool FldIsValid(EField fld) {
return 0 <= fld && FieldAllMAX > fld;
}
bool FldSetCmp(ui32 chk, ui32 exp) const {
return (FieldsSet & chk) == exp;
}
bool FldSetCmp(ui32 chk) const {
return FldSetCmp(chk, chk);
}
bool FldIsSet(EField fld) const {
return !FldSetCmp(FldFlag(fld), 0);
}
private:
void FldMarkSet(EField fld) {
FieldsSet |= FldFlag(fld);
}
void FldMarkUnset(EField fld) {
FieldsSet &= ~FldFlag(fld);
}
// use when we know the field is dirty or RewriteImpl will be called
void FldSetNoDirty(EField fld, const TStringBuf& value) {
Fields[fld] = value;
FldMarkSet(fld);
}
void FldSet(EField fld, const TStringBuf& value) {
FldSetNoDirty(fld, value);
FldMarkDirty(fld);
}
const TStringBuf& FldGet(EField fld) const {
return Fields[fld];
}
private:
/// depending on value, clears or sets it
void FldChkSet(EField fld, const TStringBuf& value) {
if (value.IsInited())
FldSet(fld, value);
else
FldClr(fld);
}
void FldChkSet(EField fld, const TUri& other) {
FldChkSet(fld, other.GetField(fld));
}
/// set only if initialized
bool FldTrySet(EField fld, const TStringBuf& value) {
const bool ok = value.IsInited();
if (ok)
FldSet(fld, value);
return ok;
}
bool FldTrySet(EField fld, const TUri& other) {
return FldTrySet(fld, other.GetField(fld));
}
private:
/// copies the value if it fits
bool FldTryCpy(EField fld, const TStringBuf& value);
// main method: sets the field value, possibly copies, etc.
bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false);
public: // clear a field
void FldClr(EField fld) {
Fields[fld].Clear();
FldMarkUnset(fld);
FldMarkClean(fld);
}
bool FldTryClr(EField field) {
const bool ok = FldIsSet(field);
if (ok)
FldClr(field);
return ok;
}
public: // set a field value: might leave state dirty and require a Rewrite()
// copies if fits and not dirty, sets and marks dirty otherwise
bool FldMemCpy(EField field, const TStringBuf& value) {
return FldSetImpl(field, value, false);
}
// uses directly, marks dirty
/// @note client MUST guarantee value will be alive until Rewrite is called
bool FldMemSet(EField field, const TStringBuf& value) {
return FldSetImpl(field, value, false, true);
}
// uses directly, doesn't mark dirty (value scope exceeds "this")
bool FldMemUse(EField field, const TStringBuf& value) {
return FldSetImpl(field, value, true);
}
// uses directly, doesn't mark dirty
template <size_t size>
bool FldMemSet(EField field, const char (&value)[size]) {
static_assert(size > 0);
return FldSetImpl(field, TStringBuf(value, size - 1), true);
}
// duplicate one field to another
bool FldDup(EField src, EField dst) {
if (!FldIsSet(src) || !FldIsValid(dst))
return false;
FldSetNoDirty(dst, FldGet(src));
if (FldIsDirty(src))
FldMarkDirty(dst);
else
FldMarkClean(dst);
return true;
}
// move one field to another
bool FldMov(EField src, EField dst) {
if (!FldDup(src, dst))
return false;
FldClr(src);
return true;
}
private:
bool IsInBuffer(const char* buf) const {
return buf >= Buffer.data() && buf < Buffer.data() + Buffer.size();
}
public:
bool FldIsDirty() const {
return 0 != FieldsDirty;
}
bool FldIsDirty(EField fld) const {
return 0 != (FieldsDirty & FldFlag(fld));
}
private:
void FldMarkDirty(EField fld) {
FieldsDirty |= FldFlag(fld);
}
void FldMarkClean(EField fld) {
FieldsDirty &= ~FldFlag(fld);
}
void RewriteImpl();
public:
static TState::EParsed CheckHost(const TStringBuf& host);
// convert a [potential] IDN to ascii
static TMallocPtr<char> IDNToAscii(const wchar32* idna);
static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8);
// convert hosts with percent-encoded or extended chars
// returns non-empty string if host can be converted to ASCII with given parameters
static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8);
// returns host if already ascii, or non-empty if it can be converted
static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8);
public:
explicit TUri(unsigned defaultPort = 0)
: FieldsSet(0)
, Port(0)
, DefaultPort(static_cast<ui16>(defaultPort))
, Scheme(SchemeEmpty)
, FieldsDirty(0)
{
}
TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0, const TStringBuf& hashbang = TStringBuf());
TUri(const TUri& url)
: FieldsSet(url.FieldsSet)
, Port(url.Port)
, DefaultPort(url.DefaultPort)
, Scheme(url.Scheme)
, FieldsDirty(url.FieldsDirty)
{
CopyImpl(url);
}
~TUri() {
Clear();
}
void Copy(const TUri& url) {
if (&url != this) {
CopyData(url);
CopyImpl(url);
}
}
void Clear() {
Dealloc();
ClearImpl();
}
ui32 GetFieldMask() const {
return FieldsSet;
}
ui32 GetUrlFieldMask() const {
return GetFieldMask() & FlagUrlFields;
}
ui32 GetDirtyMask() const {
return FieldsDirty;
}
void CheckMissingFields();
// Process methods
void Rewrite() {
if (FldIsDirty())
RewriteImpl();
}
private:
TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty);
TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8);
public:
TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) {
const TState::EParsed ret = AssignImpl(parser, defscheme);
if (ParsedOK == ret)
Rewrite();
return ret;
}
TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc);
if (ParsedOK == ret)
Rewrite();
return ret;
}
// parses absolute URIs
// prepends default scheme (unless unknown) if URI has none
TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8);
TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc);
}
TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8);
TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) {
return ParseUri(url, flags);
}
TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8);
TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc);
return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat;
}
// correctAbs works with head "/.." portions:
// 1 - reject URL
// 0 - keep portions
// -1 - ignore portions
void Merge(const TUri& base, int correctAbs = -1);
TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), ui64 careFlags = FeaturesDefault, ECharset enc = CODES_UTF8);
private:
int PrintFlags(int flags) const {
if (0 == (FlagUrlFields & flags))
flags |= FlagUrlFields;
return flags;
}
protected:
size_t PrintSize(ui32 flags) const;
// Output method, prints to stream
IOutputStream& PrintImpl(IOutputStream& out, int flags) const;
char* PrintImpl(char* str, size_t size, int flags) const {
TMemoryOutput out(str, size);
PrintImpl(out, flags) << '\0';
return str;
}
static bool IsAbsPath(const TStringBuf& path) {
return 1 <= path.length() && path[0] == '/';
}
bool IsAbsPathImpl() const {
return IsAbsPath(GetField(FieldPath));
}
public:
// Output method, prints to stream
IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const {
return PrintImpl(out, PrintFlags(flags));
}
// Output method, print to str, allocate memory if str is NULL
// Should be deprecated
char* Print(char* str, size_t size, int flags = FlagUrlFields) const {
return nullptr == str ? Serialize(flags) : Serialize(str, size, flags);
}
char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const {
Y_ASSERT(str);
flags = PrintFlags(flags);
const size_t printSize = PrintSize(flags) + 1;
return printSize > size ? nullptr : PrintImpl(str, size, flags);
}
char* Serialize(int flags = FlagUrlFields) const {
flags = PrintFlags(flags);
const size_t size = PrintSize(flags) + 1;
return PrintImpl(static_cast<char*>(malloc(size)), size, flags);
}
// Output method to str
void Print(TString& str, int flags = FlagUrlFields) const {
flags = PrintFlags(flags);
str.reserve(str.length() + PrintSize(flags));
TStringOutput out(str);
PrintImpl(out, flags);
}
TString PrintS(int flags = FlagUrlFields) const {
TString str;
Print(str, flags);
return str;
}
// Only non-default scheme and port are printed
char* PrintHost(char* str, size_t size) const {
return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
}
TString PrintHostS() const {
return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
}
// Info methods
int Compare(const TUri& A, int flags = FlagUrlFields) const;
int CompareField(EField fld, const TUri& url) const;
const TStringBuf& GetField(EField fld) const {
return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>();
}
ui16 GetPort() const {
return 0 == Port ? DefaultPort : Port;
}
const TStringBuf& GetHost() const {
if (GetFieldMask() & FlagHostAscii)
return FldGet(FieldHostAscii);
if (GetFieldMask() & FlagHost)
return FldGet(FieldHost);
return Default<TStringBuf>();
}
bool UseHostAscii() {
return FldMov(FieldHostAscii, FieldHost);
}
TScheme::EKind GetScheme() const {
return Scheme;
}
const TSchemeInfo& GetSchemeInfo() const {
return TSchemeInfo::Get(Scheme);
}
bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const {
return !FldSetCmp(flags);
}
bool IsNull(EField fld) const {
return !FldIsSet(fld);
}
bool IsValidAbs() const {
if (IsNull(FlagScheme | FlagHost | FlagPath))
return false;
return IsAbsPathImpl();
}
bool IsValidGlobal() const {
if (IsNull(FlagScheme | FlagHost))
return false;
if (IsNull(FlagPath))
return true;
return IsAbsPathImpl();
}
bool IsRootless() const {
return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl();
}
// for RFC 2396 compatibility
bool IsOpaque() const {
return IsRootless();
}
// Inline helpers
TUri& operator=(const TUri& u) {
Copy(u);
return *this;
}
bool operator!() const {
return IsNull();
}
bool Equal(const TUri& A, int flags = FlagUrlFields) const {
return (Compare(A, flags) == 0);
}
bool Less(const TUri& A, int flags = FlagUrlFields) const {
return (Compare(A, flags) < 0);
}
bool operator==(const TUri& A) const {
return Equal(A, FlagNoFrag);
}
bool operator!=(const TUri& A) const {
return !Equal(A, FlagNoFrag);
}
bool operator<(const TUri& A) const {
return Less(A, FlagNoFrag);
}
bool IsSameDocument(const TUri& other) const {
// pre: both *this and 'other' should be normalized to valid abs
Y_ASSERT(IsValidAbs());
return Equal(other, FlagNoFrag);
}
bool IsLocal(const TUri& other) const {
// pre: both *this and 'other' should be normalized to valid abs
Y_ASSERT(IsValidAbs() && other.IsValidAbs());
return Equal(other, FlagScheme | FlagHostPort);
}
TLinkType Locality(const TUri& other) const {
if (IsSameDocument(other))
return LinkIsFragment;
else if (IsLocal(other))
return LinkIsLocal;
return LinkIsGlobal;
}
static IOutputStream& ReEncodeField(IOutputStream& out, const TStringBuf& val, EField fld, ui64 flags = FeaturesEncodeDecode) {
return NEncode::TEncoder::ReEncode(out, val, NEncode::TEncodeMapper(flags, fld));
}
static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, ui64 srcflags, EField dstfld, ui64 dstflags) {
return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld));
}
static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, ui64 flags = FeaturesEncodeDecode) {
return ReEncodeField(out, val, FieldAllMAX, flags);
}
static int PathOperationFlag(const TParseFlags& flags) {
return flags & FeaturePathDenyRootParent ? 1
: flags & FeaturePathStripRootParent ? -1 : 0;
}
static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs);
private:
const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) {
Scheme = info.Kind;
DefaultPort = info.Port;
if (!info.Str.empty())
FldSetNoDirty(FieldScheme, info.Str);
return info;
}
const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) {
return SetSchemeImpl(TSchemeInfo::Get(scheme));
}
public:
const TSchemeInfo& SetScheme(const TSchemeInfo& info) {
SetSchemeImpl(info);
if (!info.Str.empty())
FldMarkClean(FieldScheme);
return info;
}
const TSchemeInfo& SetScheme(TScheme::EKind scheme) {
return SetScheme(TSchemeInfo::Get(scheme));
}
};
class TUriUpdate {
TUri& Uri_;
public:
TUriUpdate(TUri& uri)
: Uri_(uri)
{
}
~TUriUpdate() {
Uri_.Rewrite();
}
public:
bool Set(TField::EField field, const TStringBuf& value) {
return Uri_.FldMemSet(field, value);
}
template <size_t size>
bool Set(TField::EField field, const char (&value)[size]) {
return Uri_.FldMemSet(field, value);
}
void Clr(TField::EField field) {
Uri_.FldClr(field);
}
};
const char* LinkTypeToString(const TUri::TLinkType& t);
}
Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) {
url.Print(out);
}
Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) {
out << NUri::LinkTypeToString(t);
}