diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/uri/encode.cpp | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/uri/encode.cpp')
-rw-r--r-- | library/cpp/uri/encode.cpp | 386 |
1 files changed, 193 insertions, 193 deletions
diff --git a/library/cpp/uri/encode.cpp b/library/cpp/uri/encode.cpp index 34790c2d21..584fb1bac9 100644 --- a/library/cpp/uri/encode.cpp +++ b/library/cpp/uri/encode.cpp @@ -1,9 +1,9 @@ #include "encode.h" -#include <util/generic/singleton.h> +#include <util/generic/singleton.h> namespace NUri { - namespace NEncode { + namespace NEncode { // http://tools.ietf.org/html/rfc3986#section-2.2 #define GENDELIMS0 ":/?#[]@" #define SUBDELIMS0 "!$&'()*+,;=" @@ -24,197 +24,197 @@ namespace NUri { // rest allowed in qry, frag #define SUBDELIMS2 "!$'()*," - const TEncoder::TGrammar& TEncoder::Grammar() { - return *Singleton<TEncoder::TGrammar>(); - } - - // initialize the grammar map - TEncoder::TGrammar::TGrammar() { - // first set up unreserved characters safe in any field - const ui64 featUnres = TFeature::FeatureDecodeUnreserved; - AddRng('0', '9', ECFDigit, featUnres); - AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); - AddRng('a', 'z', ECFLower, featUnres); - Add(UNRESERVED, ECFUnres, featUnres); - - // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: - // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') - Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); - - // now field-specific subsets of reserved characters (gen-delims + sub-delims) - const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; - - Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); - Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); - - Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); - Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); - - // control chars - AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); - Add(0x7f, TFeature::FeatureEncodeCntrl); - - // '%' starts a percent-encoded sequence - Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); - - // extended ASCII - AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); - - // extended delims - Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); - - // add characters with other features - Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); - Add("'\"\\", TFeature::FeatureEncodeForSQL); - - GetMutable(':').EncodeFld |= TField::FlagUser; - GetMutable('?').EncodeFld |= TField::FlagPath; - GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; - GetMutable('&').EncodeFld |= TField::FlagQuery; - GetMutable('+').EncodeFld |= TField::FlagQuery; - } - - // should we decode an encoded character - bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const { - const ui64 myflags = flags & FeatFlags; - if (myflags & TFeature::FeaturesEncode) - return false; - if (myflags & TFeature::FeaturesDecode) - return true; - return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); - } - - const int dD = 'a' - 'A'; - - int TEncodeMapper::EncodeSym(unsigned char& ch) const { - const TCharFlags& chflags = TEncoder::GetFlags(ch); - const ui64 flags = Flags & chflags.FeatFlags; - - if (flags & TFeature::FeatureToLower) - ch += dD; - - if (Q_DecodeAny) - return -1; - - if (flags & TFeature::FeaturesEncode) - return 1; - - if (' ' == ch) { - if (Q_EncodeSpcAsPlus) - ch = '+'; - return 0; - } - - return 0; - } - - int TEncodeMapper::EncodeHex(unsigned char& ch) const { - const TCharFlags& chflags = TEncoder::GetFlags(ch); - const ui64 flags = Flags & chflags.FeatFlags; - - if (flags & TFeature::FeatureToLower) - ch += dD; - - if (Q_DecodeAny) - return -1; - - if (chflags.IsDecode(FldMask, Flags)) - return 0; - - if (' ' == ch) { - if (!Q_EncodeSpcAsPlus) - return 1; - ch = '+'; - return 0; - } - + const TEncoder::TGrammar& TEncoder::Grammar() { + return *Singleton<TEncoder::TGrammar>(); + } + + // initialize the grammar map + TEncoder::TGrammar::TGrammar() { + // first set up unreserved characters safe in any field + const ui64 featUnres = TFeature::FeatureDecodeUnreserved; + AddRng('0', '9', ECFDigit, featUnres); + AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); + AddRng('a', 'z', ECFLower, featUnres); + Add(UNRESERVED, ECFUnres, featUnres); + + // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: + // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') + Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); + + // now field-specific subsets of reserved characters (gen-delims + sub-delims) + const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; + + Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); + Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); + + Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); + Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); + + // control chars + AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); + Add(0x7f, TFeature::FeatureEncodeCntrl); + + // '%' starts a percent-encoded sequence + Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); + + // extended ASCII + AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); + + // extended delims + Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); + + // add characters with other features + Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); + Add("'\"\\", TFeature::FeatureEncodeForSQL); + + GetMutable(':').EncodeFld |= TField::FlagUser; + GetMutable('?').EncodeFld |= TField::FlagPath; + GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; + GetMutable('&').EncodeFld |= TField::FlagQuery; + GetMutable('+').EncodeFld |= TField::FlagQuery; + } + + // should we decode an encoded character + bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const { + const ui64 myflags = flags & FeatFlags; + if (myflags & TFeature::FeaturesEncode) + return false; + if (myflags & TFeature::FeaturesDecode) + return true; + return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); + } + + const int dD = 'a' - 'A'; + + int TEncodeMapper::EncodeSym(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (flags & TFeature::FeaturesEncode) + return 1; + + if (' ' == ch) { + if (Q_EncodeSpcAsPlus) + ch = '+'; + return 0; + } + + return 0; + } + + int TEncodeMapper::EncodeHex(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (chflags.IsDecode(FldMask, Flags)) + return 0; + + if (' ' == ch) { + if (!Q_EncodeSpcAsPlus) + return 1; + ch = '+'; + return 0; + } + return 1; - } - - bool TEncodeToMapper::Encode(unsigned char ch) const { - if (Q_DecodeAny) - return false; - - const TCharFlags& chflags = TEncoder::GetFlags(ch); - if (FldMask & chflags.EncodeFld) - return true; - - const ui64 flags = Flags & chflags.FeatFlags; - return (flags & TFeature::FeaturesEncode); - } - - TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) - : Out(out) - , FldSrc(fldsrc) - , FldDst(flddst) - , OutFlags(0) - , HexValue(0) - { - } - - IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { - static const char sHexCodes[] = "0123456789ABCDEF"; - return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; - } - - IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { - for (size_t i = 0; i != val.length(); ++i) - Encode(out, val[i]); - return out; - } - - IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) { - for (size_t i = 0; i != val.length(); ++i) { - const char c = val[i]; - if (IsAlnum(c)) - out << c; - else - Encode(out, c); - } - return out; - } - - IOutputStream& TEncoder::EncodeField( - IOutputStream& out, const TStringBuf& val, TField::EField fld) { - const ui32 fldmask = ui32(1) << fld; - for (size_t i = 0; i != val.length(); ++i) { - const char ch = val[i]; - if (GetFlags(ch).IsAllowed(fldmask)) - out << ch; - else - Encode(out, ch); - } - return out; - } - - IOutputStream& TEncoder::EncodeField( - IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) { - const ui32 fldmask = ui32(1) << fld; - for (size_t i = 0; i != val.length(); ++i) { - const char ch = val[i]; - if (GetFlags(ch).IsDecode(fldmask, flags)) - out << ch; - else - Encode(out, ch); - } - return out; - } - - void TEncoder::Do(unsigned char ch, int res) { - OutFlags |= GetFlags(ch).FeatFlags; - - bool escapepct = false; - if (0 < res) // definitely encode - escapepct = FldDst.Enabled(); - else if (0 != res || !FldDst.Enabled() || !FldDst.Encode(ch)) { - Out << ch; - return; - } - - Out << '%'; - if (escapepct) - Out.Write("25", 2); // '%' - Hex(Out, ch); - } + } + + bool TEncodeToMapper::Encode(unsigned char ch) const { + if (Q_DecodeAny) + return false; + + const TCharFlags& chflags = TEncoder::GetFlags(ch); + if (FldMask & chflags.EncodeFld) + return true; + + const ui64 flags = Flags & chflags.FeatFlags; + return (flags & TFeature::FeaturesEncode); + } + + TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) + : Out(out) + , FldSrc(fldsrc) + , FldDst(flddst) + , OutFlags(0) + , HexValue(0) + { + } + + IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { + static const char sHexCodes[] = "0123456789ABCDEF"; + return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; + } + + IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) + Encode(out, val[i]); + return out; + } + + IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) { + const char c = val[i]; + if (IsAlnum(c)) + out << c; + else + Encode(out, c); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsAllowed(fldmask)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsDecode(fldmask, flags)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + void TEncoder::Do(unsigned char ch, int res) { + OutFlags |= GetFlags(ch).FeatFlags; + + bool escapepct = false; + if (0 < res) // definitely encode + escapepct = FldDst.Enabled(); + else if (0 != res || !FldDst.Enabled() || !FldDst.Encode(ch)) { + Out << ch; + return; + } + + Out << '%'; + if (escapepct) + Out.Write("25", 2); // '%' + Hex(Out, ch); + } } } |