diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/uri/encode.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/uri/encode.h')
-rw-r--r-- | library/cpp/uri/encode.h | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/library/cpp/uri/encode.h b/library/cpp/uri/encode.h new file mode 100644 index 0000000000..a9ece15427 --- /dev/null +++ b/library/cpp/uri/encode.h @@ -0,0 +1,282 @@ +#pragma once + +#include "common.h" + +#include <util/stream/output.h> + +namespace NUri { + namespace NEncode { +#define CHAR_TYPE_NAME(f) _ECT##f +#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f) + + enum ECharType { + CHAR_TYPE_NAME(Digit), + CHAR_TYPE_NAME(Lower), + CHAR_TYPE_NAME(Upper), + CHAR_TYPE_NAME(Unres), + CHAR_TYPE_NAME(Stdrd), + }; + + enum ECharFlag { + CHAR_TYPE_FLAG(Digit), + CHAR_TYPE_FLAG(Lower), + CHAR_TYPE_FLAG(Upper), + CHAR_TYPE_FLAG(Unres), + CHAR_TYPE_FLAG(Stdrd), + // compound group flags + ECGAlpha = ECFUpper | ECFLower, + ECGAlnum = ECGAlpha | ECFDigit, + ECGUnres = ECGAlnum | ECFUnres, + ECGStdrd = ECGUnres | ECFStdrd, + }; + +#undef CHAR_TYPE_NAME +#undef CHAR_TYPE_FLAG + + struct TCharFlags { + ui32 TypeFlags; + ui64 FeatFlags; + ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed + ui32 EncodeFld; // encode if shouldn't be treated as delimiter + TCharFlags(ui64 feat = 0) + : TypeFlags(0) + , FeatFlags(feat) + , DecodeFld(0) + , EncodeFld(0) + { + } + TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) + : TypeFlags(type) + , FeatFlags(feat) + , DecodeFld(decmask) + , EncodeFld(encmask) + { + } + TCharFlags& Add(const TCharFlags& val) { + TypeFlags |= val.TypeFlags; + FeatFlags |= val.FeatFlags; + DecodeFld |= val.DecodeFld; + EncodeFld |= val.EncodeFld; + return *this; + } + bool IsAllowed(ui32 fldmask) const { + return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask); + } + // should we decode an encoded character + bool IsDecode(ui32 fldmask, ui64 flags) const; + }; + + class TEncodeMapperBase { + protected: + TEncodeMapperBase() + : Flags(0) + , FldMask(0) + , Q_DecodeAny(false) + { + } + TEncodeMapperBase(ui64 flags, TField::EField fld) + : Flags(flags) + , FldMask(1u << fld) + , Q_DecodeAny(flags & TFeature::FeatureDecodeANY) + { + } + + protected: + const ui64 Flags; + const ui32 FldMask; + const bool Q_DecodeAny; // this is a special option for username/password + }; + + // maps a sym or hex character and indicates whether it has to be encoded + class TEncodeMapper + : public TEncodeMapperBase { + public: + TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) + : TEncodeMapperBase(flags, fld) + , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus) + { + } + // negative=sym, positive=hex, zero=maybesym + int EncodeSym(unsigned char&) const; + int EncodeHex(unsigned char&) const; + + protected: + const bool Q_EncodeSpcAsPlus; + }; + + // indicates whether a character has to be encoded when copying to a field + class TEncodeToMapper + : public TEncodeMapperBase { + public: + TEncodeToMapper() + : TEncodeMapperBase() + { + } + TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX) + : TEncodeMapperBase(flags, fld) + { + } + bool Enabled() const { + return 0 != FldMask; + } + bool Encode(unsigned char) const; + }; + + class TEncoder { + public: + TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper()); + + ui64 ReEncode(const TStringBuf& url); + ui64 ReEncode(const char* str, size_t len) { + return ReEncode(TStringBuf(str, len)); + } + + protected: + static bool IsType(unsigned char c, ui64 flags) { + return GetFlags(c).TypeFlags & flags; + } + + public: + static bool IsDigit(unsigned char c) { + return IsType(c, ECFDigit); + } + static bool IsUpper(unsigned char c) { + return IsType(c, ECFUpper); + } + static bool IsLower(unsigned char c) { + return IsType(c, ECFLower); + } + static bool IsAlpha(unsigned char c) { + return IsType(c, ECGAlpha); + } + static bool IsAlnum(unsigned char c) { + return IsType(c, ECGAlnum); + } + static bool IsUnres(unsigned char c) { + return IsType(c, ECGUnres); + } + static const TCharFlags& GetFlags(unsigned char c) { + return Grammar().Get(c); + } + + public: + // process an encoded string, decoding safe chars and encoding unsafe + static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) { + TEncoder(out, srcfld).ReEncode(val); + return out; + } + static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) { + TEncoder(out, srcfld, dstfld).ReEncode(val); + return out; + } + + // see also UrlUnescape() from string/quote.h + static IOutputStream& Decode( + IOutputStream& out, const TStringBuf& val, ui64 flags) { + return ReEncode(out, val, flags | TFeature::FeatureDecodeANY); + } + + public: + // process a raw string or char, encode as needed + static IOutputStream& Hex(IOutputStream& out, unsigned char val); + static IOutputStream& Encode(IOutputStream& out, unsigned char val) { + out << '%'; + return Hex(out, val); + } + static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val); + static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val); + + static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld); + static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags); + + static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) { + return EncodeField(out, val, TField::FieldAllMAX); + } + + static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) { + return EncodeField(out, val, TField::FieldAllMAX, flags); + } + + public: + class TGrammar { + TCharFlags Map_[256]; + + public: + TGrammar(); + const TCharFlags& Get(unsigned char ch) const { + return Map_[ch]; + } + + TCharFlags& GetMutable(unsigned char ch) { + return Map_[ch]; + } + TCharFlags& Add(unsigned char ch, const TCharFlags& val) { + return GetMutable(ch).Add(val); + } + + void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) { + for (unsigned i = lo; i <= hi; ++i) + Add(i, val); + } + void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { + AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask)); + } + + void Add(const TStringBuf& set, const TCharFlags& val) { + for (size_t i = 0; i != set.length(); ++i) + Add(set[i], val); + } + void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) { + Add(set, TCharFlags(type, feat, decmask, encmask)); + } + }; + + static const TGrammar& Grammar(); + + protected: + IOutputStream& Out; + const TEncodeMapper FldSrc; + const TEncodeToMapper FldDst; + ui64 OutFlags; + int HexValue; + + protected: + void HexReset() { + HexValue = 0; + } + + void HexDigit(char c) { + HexAdd(c - '0'); + } + void HexUpper(char c) { + HexAdd(c - 'A' + 10); + } + void HexLower(char c) { + HexAdd(c - 'a' + 10); + } + + void HexAdd(int val) { + HexValue <<= 4; + HexValue += val; + } + + protected: + void DoSym(unsigned char ch) { + const int res = FldSrc.EncodeSym(ch); + Do(ch, res); + } + void DoHex(unsigned char ch) { + const int res = FldSrc.EncodeHex(ch); + Do(ch, res); + } + void DoHex() { + DoHex(HexValue); + HexValue = 0; + } + void Do(unsigned char, int); + }; + } + + using TEncoder = NEncode::TEncoder; + +} |