diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/uri/parse.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/uri/parse.h')
-rw-r--r-- | library/cpp/uri/parse.h | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/library/cpp/uri/parse.h b/library/cpp/uri/parse.h new file mode 100644 index 0000000000..ca2358e572 --- /dev/null +++ b/library/cpp/uri/parse.h @@ -0,0 +1,361 @@ +#pragma once + +// #define DO_PRN + +#include <cstddef> + +#include "common.h" + +#include <library/cpp/charset/doccodes.h> +#include <util/generic/strbuf.h> +#include <util/stream/output.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> + +namespace NUri { + class TParser; + + namespace NParse { + class TRange { + public: + const char* Beg; + ui64 FlagsEncodeMasked; + ui64 FlagsAllPlaintext; + ui32 Encode; + ui32 Decode; + + public: + TRange(const char* beg = nullptr) + : Beg(beg) + , FlagsEncodeMasked(0) + , FlagsAllPlaintext(0) + , Encode(0) + , Decode(0) + { + } + + void Reset(const char* beg = nullptr) { + *this = TRange(beg); + } + + void AddRange(const TRange& range, ui64 mask); + + void AddFlag(const char* ptr, ui64 mask, ui64 flag) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag, flag); + } + + void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag & ~exclflag, flag); + } + + void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag, flag, exclmask); + } + + void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) { + if (0 != flag) + AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask); + } + + private: + void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) { + AddFlagAllPlaintextImpl(ptr, plainflag); + AddFlagEncodeMaskedImpl(encflag & mask); + } + + void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) { + AddFlagAllPlaintextImpl(ptr, plainflag); + if (0 == (mask & exclmask)) + AddFlagEncodeMaskedImpl(encflag & mask); + } + + void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) { + if (nullptr == Beg) + Beg = ptr; + FlagsAllPlaintext |= flag; + } + + void AddFlagEncodeMaskedImpl(ui64 flag) { + if (0 == flag) + return; + FlagsEncodeMasked |= flag; + if (flag & TFeature::FeaturesMaybeEncode) + ++Encode; + else if (flag & TFeature::FeaturesDecode) + ++Decode; + } + }; + + } + + class TSection + : protected NParse::TRange { + private: + friend class TParser; + + private: + const char* End; + + TSection(const char* beg = nullptr) + : NParse::TRange(beg) + , End(nullptr) + { + } + + void Reset() { + Enter(nullptr); + } + + void Reset(const char* pc) { + Y_ASSERT(!Beg || !pc || Beg < pc); + Reset(); + } + + void Enter(const char* pc) { + *this = TSection(pc); + } + + bool Leave(const char* pc) { + Y_ASSERT(Beg); + End = pc; + return true; + } + + void Set(const TStringBuf& buf) { + Enter(buf.data()); + Leave(buf.data() + buf.length()); + } + + public: + bool IsSet() const { + return End; + } + + TStringBuf Get() const { + return TStringBuf(Beg, End); + } + + size_t Len() const { + return End - Beg; + } + + size_t DecodedLen() const { + return Len() - 2 * Decode; + } + + size_t EncodedLen() const { + return 2 * Encode + DecodedLen(); + } + + ui32 GetEncode() const { + return Encode; + } + + ui32 GetDecode() const { + return Decode; + } + + ui64 GetFlagsEncode() const { + return FlagsEncodeMasked; + } + + ui64 GetFlagsAllPlaintext() const { + return FlagsAllPlaintext; + } + }; + + class TParser { + public: + TSection Sections[TField::FieldUrlMAX]; + TScheme::EKind Scheme; + const TParseFlags Flags; + const TStringBuf UriStr; + TState::EParsed State; + ECharset Enc; + + public: + TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8) + : Scheme(TScheme::SchemeEmpty) + , Flags(flags | TFeature::FeatureDecodeANY) + , UriStr(uri) + , State(TState::ParsedEmpty) + , Enc(enc) + , HexValue(0) + , PctBegin(nullptr) + { + Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation) + // can't define all of them + || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath)); + State = ParseImpl(); + } + + public: + const TSection& Get(TField::EField fld) const { + return Sections[fld]; + } + TSection& GetMutable(TField::EField fld) { + return Sections[fld]; + } + bool Has(TField::EField fld) const { + return Get(fld).IsSet(); + } + bool IsNetPath() const { + return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1]; + } + bool IsRootless() const { + return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]); + } + // for RFC 2396 compatibility + bool IsOpaque() const { + return IsRootless(); + } + static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) { + return FieldFlags[fld] & flags; + } + ui64 GetFieldFlags(TField::EField fld) const { + return GetFieldFlags(fld, Flags); + } + + protected: + static const TParseFlags FieldFlags[TField::FieldUrlMAX]; + TSection::TRange CurRange; + unsigned HexValue; + const char* PctBegin; + +#ifdef DO_PRN + IOutputStream& PrintAddr(const char* ptr) const { + return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] "; + } + + IOutputStream& PrintHead(const char* ptr, const char* func) const { + return PrintAddr(ptr) << func << " "; + } + + IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const { + return PrintHead(ptr, func) << fld; + } + + IOutputStream& PrintTail(const TStringBuf& val) const { + return Cdbg << " [" << val << "]" << Endl; + } + IOutputStream& PrintTail(const char* beg, const char* end) const { + return PrintTail(TStringBuf(beg, end)); + } +#endif + + void ResetSection(TField::EField fld, const char* pc = nullptr) { +#ifdef DO_PRN + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); +#endif + Sections[fld].Reset(pc); + } + + void storeSection(const TStringBuf& val, TField::EField fld) { +#ifdef DO_PRN + PrintHead(val.data(), __FUNCTION__, fld); + PrintTail(val); +#endif + Sections[fld].Set(val); + } + + void startSection(const char* pc, TField::EField fld) { +#ifdef DO_PRN + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); +#endif + copyRequirements(pc); + Sections[fld].Enter(pc); + } + + void finishSection(const char* pc, TField::EField fld) { +#ifdef DO_PRN + PrintHead(pc, __FUNCTION__, fld); + PrintTail(pc); +#endif + if (Sections[fld].Leave(pc)) + copyRequirements(pc); + } + + void setRequirement(const char* ptr, ui64 flags) { +#ifdef DO_PRN + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); +#endif + CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags); + } + + void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) { +#ifdef DO_PRN + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & exclflag=" << IntToString<16>(exclflag) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); +#endif + CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag); + } + + void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) { +#ifdef DO_PRN + PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) + << " & exclmask=" << IntToString<16>(exclmask) + << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); + PrintTail(ptr); +#endif + CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask); + } + + void copyRequirementsImpl(const char* ptr); + void copyRequirements(const char* ptr) { + PctEnd(ptr); + if (nullptr != CurRange.Beg && CurRange.Beg != ptr) + copyRequirementsImpl(ptr); + } + + void HexDigit(const char* ptr, char c) { + Y_UNUSED(ptr); + HexAdd(c - '0'); + } + void HexUpper(const char* ptr, char c) { + setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded); + HexAdd(c - 'A' + 10); + } + void HexLower(const char* ptr, char c) { + setRequirement(ptr, TFeature::FeatureUpperEncoded); + HexAdd(c - 'a' + 10); + } + void HexAdd(unsigned val) { + HexValue <<= 4; + HexValue += val; + } + void HexReset() { + HexValue = 0; + } + void HexSet(const char* ptr); + + void PctEndImpl(const char* ptr); + void PctEnd(const char* ptr) { + if (nullptr != PctBegin && ptr != PctBegin) + PctEndImpl(ptr); + } + void PctBeg(const char* ptr) { + PctEnd(ptr); + HexReset(); + PctBegin = ptr; + } + + void checkSectionCollision(TField::EField fld1, TField::EField fld2) { + if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) { + Sections[fld1].Reset(); + } + } + + bool doParse(const char* str_beg, size_t length); + TState::EParsed ParseImpl(); + }; + +} |