aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/uri/encode.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/uri/encode.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/uri/encode.h')
-rw-r--r--library/cpp/uri/encode.h282
1 files changed, 282 insertions, 0 deletions
diff --git a/library/cpp/uri/encode.h b/library/cpp/uri/encode.h
new file mode 100644
index 0000000000..a9ece15427
--- /dev/null
+++ b/library/cpp/uri/encode.h
@@ -0,0 +1,282 @@
+#pragma once
+
+#include "common.h"
+
+#include <util/stream/output.h>
+
+namespace NUri {
+ namespace NEncode {
+#define CHAR_TYPE_NAME(f) _ECT##f
+#define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)
+
+ enum ECharType {
+ CHAR_TYPE_NAME(Digit),
+ CHAR_TYPE_NAME(Lower),
+ CHAR_TYPE_NAME(Upper),
+ CHAR_TYPE_NAME(Unres),
+ CHAR_TYPE_NAME(Stdrd),
+ };
+
+ enum ECharFlag {
+ CHAR_TYPE_FLAG(Digit),
+ CHAR_TYPE_FLAG(Lower),
+ CHAR_TYPE_FLAG(Upper),
+ CHAR_TYPE_FLAG(Unres),
+ CHAR_TYPE_FLAG(Stdrd),
+ // compound group flags
+ ECGAlpha = ECFUpper | ECFLower,
+ ECGAlnum = ECGAlpha | ECFDigit,
+ ECGUnres = ECGAlnum | ECFUnres,
+ ECGStdrd = ECGUnres | ECFStdrd,
+ };
+
+#undef CHAR_TYPE_NAME
+#undef CHAR_TYPE_FLAG
+
+ struct TCharFlags {
+ ui32 TypeFlags;
+ ui64 FeatFlags;
+ ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed
+ ui32 EncodeFld; // encode if shouldn't be treated as delimiter
+ TCharFlags(ui64 feat = 0)
+ : TypeFlags(0)
+ , FeatFlags(feat)
+ , DecodeFld(0)
+ , EncodeFld(0)
+ {
+ }
+ TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0)
+ : TypeFlags(type)
+ , FeatFlags(feat)
+ , DecodeFld(decmask)
+ , EncodeFld(encmask)
+ {
+ }
+ TCharFlags& Add(const TCharFlags& val) {
+ TypeFlags |= val.TypeFlags;
+ FeatFlags |= val.FeatFlags;
+ DecodeFld |= val.DecodeFld;
+ EncodeFld |= val.EncodeFld;
+ return *this;
+ }
+ bool IsAllowed(ui32 fldmask) const {
+ return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask);
+ }
+ // should we decode an encoded character
+ bool IsDecode(ui32 fldmask, ui64 flags) const;
+ };
+
+ class TEncodeMapperBase {
+ protected:
+ TEncodeMapperBase()
+ : Flags(0)
+ , FldMask(0)
+ , Q_DecodeAny(false)
+ {
+ }
+ TEncodeMapperBase(ui64 flags, TField::EField fld)
+ : Flags(flags)
+ , FldMask(1u << fld)
+ , Q_DecodeAny(flags & TFeature::FeatureDecodeANY)
+ {
+ }
+
+ protected:
+ const ui64 Flags;
+ const ui32 FldMask;
+ const bool Q_DecodeAny; // this is a special option for username/password
+ };
+
+ // maps a sym or hex character and indicates whether it has to be encoded
+ class TEncodeMapper
+ : public TEncodeMapperBase {
+ public:
+ TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
+ : TEncodeMapperBase(flags, fld)
+ , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus)
+ {
+ }
+ // negative=sym, positive=hex, zero=maybesym
+ int EncodeSym(unsigned char&) const;
+ int EncodeHex(unsigned char&) const;
+
+ protected:
+ const bool Q_EncodeSpcAsPlus;
+ };
+
+ // indicates whether a character has to be encoded when copying to a field
+ class TEncodeToMapper
+ : public TEncodeMapperBase {
+ public:
+ TEncodeToMapper()
+ : TEncodeMapperBase()
+ {
+ }
+ TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
+ : TEncodeMapperBase(flags, fld)
+ {
+ }
+ bool Enabled() const {
+ return 0 != FldMask;
+ }
+ bool Encode(unsigned char) const;
+ };
+
+ class TEncoder {
+ public:
+ TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper());
+
+ ui64 ReEncode(const TStringBuf& url);
+ ui64 ReEncode(const char* str, size_t len) {
+ return ReEncode(TStringBuf(str, len));
+ }
+
+ protected:
+ static bool IsType(unsigned char c, ui64 flags) {
+ return GetFlags(c).TypeFlags & flags;
+ }
+
+ public:
+ static bool IsDigit(unsigned char c) {
+ return IsType(c, ECFDigit);
+ }
+ static bool IsUpper(unsigned char c) {
+ return IsType(c, ECFUpper);
+ }
+ static bool IsLower(unsigned char c) {
+ return IsType(c, ECFLower);
+ }
+ static bool IsAlpha(unsigned char c) {
+ return IsType(c, ECGAlpha);
+ }
+ static bool IsAlnum(unsigned char c) {
+ return IsType(c, ECGAlnum);
+ }
+ static bool IsUnres(unsigned char c) {
+ return IsType(c, ECGUnres);
+ }
+ static const TCharFlags& GetFlags(unsigned char c) {
+ return Grammar().Get(c);
+ }
+
+ public:
+ // process an encoded string, decoding safe chars and encoding unsafe
+ static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
+ TEncoder(out, srcfld).ReEncode(val);
+ return out;
+ }
+ static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) {
+ TEncoder(out, srcfld, dstfld).ReEncode(val);
+ return out;
+ }
+
+ // see also UrlUnescape() from string/quote.h
+ static IOutputStream& Decode(
+ IOutputStream& out, const TStringBuf& val, ui64 flags) {
+ return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
+ }
+
+ public:
+ // process a raw string or char, encode as needed
+ static IOutputStream& Hex(IOutputStream& out, unsigned char val);
+ static IOutputStream& Encode(IOutputStream& out, unsigned char val) {
+ out << '%';
+ return Hex(out, val);
+ }
+ static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
+ static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);
+
+ static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
+ static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);
+
+ static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
+ return EncodeField(out, val, TField::FieldAllMAX);
+ }
+
+ static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
+ return EncodeField(out, val, TField::FieldAllMAX, flags);
+ }
+
+ public:
+ class TGrammar {
+ TCharFlags Map_[256];
+
+ public:
+ TGrammar();
+ const TCharFlags& Get(unsigned char ch) const {
+ return Map_[ch];
+ }
+
+ TCharFlags& GetMutable(unsigned char ch) {
+ return Map_[ch];
+ }
+ TCharFlags& Add(unsigned char ch, const TCharFlags& val) {
+ return GetMutable(ch).Add(val);
+ }
+
+ void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) {
+ for (unsigned i = lo; i <= hi; ++i)
+ Add(i, val);
+ }
+ void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
+ AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask));
+ }
+
+ void Add(const TStringBuf& set, const TCharFlags& val) {
+ for (size_t i = 0; i != set.length(); ++i)
+ Add(set[i], val);
+ }
+ void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
+ Add(set, TCharFlags(type, feat, decmask, encmask));
+ }
+ };
+
+ static const TGrammar& Grammar();
+
+ protected:
+ IOutputStream& Out;
+ const TEncodeMapper FldSrc;
+ const TEncodeToMapper FldDst;
+ ui64 OutFlags;
+ int HexValue;
+
+ protected:
+ void HexReset() {
+ HexValue = 0;
+ }
+
+ void HexDigit(char c) {
+ HexAdd(c - '0');
+ }
+ void HexUpper(char c) {
+ HexAdd(c - 'A' + 10);
+ }
+ void HexLower(char c) {
+ HexAdd(c - 'a' + 10);
+ }
+
+ void HexAdd(int val) {
+ HexValue <<= 4;
+ HexValue += val;
+ }
+
+ protected:
+ void DoSym(unsigned char ch) {
+ const int res = FldSrc.EncodeSym(ch);
+ Do(ch, res);
+ }
+ void DoHex(unsigned char ch) {
+ const int res = FldSrc.EncodeHex(ch);
+ Do(ch, res);
+ }
+ void DoHex() {
+ DoHex(HexValue);
+ HexValue = 0;
+ }
+ void Do(unsigned char, int);
+ };
+ }
+
+ using TEncoder = NEncode::TEncoder;
+
+}