diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/uri/encode.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/uri/encode.cpp')
-rw-r--r-- | library/cpp/uri/encode.cpp | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/library/cpp/uri/encode.cpp b/library/cpp/uri/encode.cpp new file mode 100644 index 0000000000..9eab1535bc --- /dev/null +++ b/library/cpp/uri/encode.cpp @@ -0,0 +1,221 @@ +#include "encode.h" + +#include <util/string/cast.h> +#include <util/generic/singleton.h> + +namespace NUri { + namespace NEncode { +// http://tools.ietf.org/html/rfc3986#section-2.2 +#define GENDELIMS0 ":/?#[]@" +#define SUBDELIMS0 "!$&'()*+,;=" +// http://tools.ietf.org/html/rfc3986#section-2.3 +#define UNRESERVED "-._~" + +// now find subsets which can sometimes be decoded + +// remove '#' which can't ever be decoded +// don't mark anything allowed for pass (pass is completely encoded) +// safe in path, qry, frag +#define GENDELIMS1 ":@" +// allowed in qry, frag +#define GENDELIMS2 "/?" + +// qry-unsafe chars +#define SUBDELIMS1 "&+=;" +// rest allowed in qry, frag +#define SUBDELIMS2 "!$'()*," + + const TEncoder::TGrammar& TEncoder::Grammar() { + return *Singleton<TEncoder::TGrammar>(); + } + + // initialize the grammar map + TEncoder::TGrammar::TGrammar() { + // first set up unreserved characters safe in any field + const ui64 featUnres = TFeature::FeatureDecodeUnreserved; + AddRng('0', '9', ECFDigit, featUnres); + AddRng('A', 'Z', ECFUpper, featUnres | TFeature::FeatureToLower); + AddRng('a', 'z', ECFLower, featUnres); + Add(UNRESERVED, ECFUnres, featUnres); + + // XXX: standard "safe" set used previously "-_.!~*();/:@$,", with comment: + // alnum + reserved + mark + ( '[', ']') - ('=' '+' '&' '\'' '"' '\\' '?') + Add("!*();/:@$,", ECFStdrd, TFeature::FeatureDecodeStandardExtra); + + // now field-specific subsets of reserved characters (gen-delims + sub-delims) + const ui64 featSafe = TFeature::FeatureDecodeFieldAllowed; + + Add(GENDELIMS1, 0, featSafe, TField::FlagPath | TField::FlagQuery | TField::FlagFrag); + Add(GENDELIMS2, 0, featSafe, TField::FlagQuery | TField::FlagFrag); + + Add(SUBDELIMS1, 0, featSafe, TField::FlagUser); + Add(SUBDELIMS2, 0, featSafe, TField::FlagUser | TField::FlagQuery | TField::FlagFrag); + + // control chars + AddRng(0x00, 0x20, TFeature::FeatureEncodeCntrl); + Add(0x7f, TFeature::FeatureEncodeCntrl); + + // '%' starts a percent-encoded sequence + Add('%', TFeature::FeatureDecodeANY | TFeature::FeatureEncodePercent); + + // extended ASCII + AddRng(128, 255, TFeature::FeatureEncodeExtendedASCII | TFeature::FeatureDecodeExtendedASCII); + + // extended delims + Add("\"<>[\\]^`{|}", TFeature::FeatureEncodeExtendedDelim | TFeature::FeatureDecodeExtendedDelim); + + // add characters with other features + Add(' ', TFeature::FeatureEncodeSpace | TFeature::FeatureEncodeSpaceAsPlus); + Add("'\"\\", TFeature::FeatureEncodeForSQL); + + GetMutable(':').EncodeFld |= TField::FlagUser; + GetMutable('?').EncodeFld |= TField::FlagPath; + GetMutable('#').EncodeFld |= TField::FlagPath | TField::FlagQuery; + GetMutable('&').EncodeFld |= TField::FlagQuery; + GetMutable('+').EncodeFld |= TField::FlagQuery; + } + + // should we decode an encoded character + bool TCharFlags::IsDecode(ui32 fldmask, ui64 flags) const { + const ui64 myflags = flags & FeatFlags; + if (myflags & TFeature::FeaturesEncode) + return false; + if (myflags & TFeature::FeaturesDecode) + return true; + return (fldmask & DecodeFld) && (flags & TFeature::FeatureDecodeFieldAllowed); + } + + const int dD = 'a' - 'A'; + + int TEncodeMapper::EncodeSym(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (flags & TFeature::FeaturesEncode) + return 1; + + if (' ' == ch) { + if (Q_EncodeSpcAsPlus) + ch = '+'; + return 0; + } + + return 0; + } + + int TEncodeMapper::EncodeHex(unsigned char& ch) const { + const TCharFlags& chflags = TEncoder::GetFlags(ch); + const ui64 flags = Flags & chflags.FeatFlags; + + if (flags & TFeature::FeatureToLower) + ch += dD; + + if (Q_DecodeAny) + return -1; + + if (chflags.IsDecode(FldMask, Flags)) + return 0; + + if (' ' == ch) { + if (!Q_EncodeSpcAsPlus) + return 1; + ch = '+'; + return 0; + } + + return 1; + } + + bool TEncodeToMapper::Encode(unsigned char ch) const { + if (Q_DecodeAny) + return false; + + const TCharFlags& chflags = TEncoder::GetFlags(ch); + if (FldMask & chflags.EncodeFld) + return true; + + const ui64 flags = Flags & chflags.FeatFlags; + return (flags & TFeature::FeaturesEncode); + } + + TEncoder::TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst) + : Out(out) + , FldSrc(fldsrc) + , FldDst(flddst) + , OutFlags(0) + , HexValue(0) + { + } + + IOutputStream& TEncoder::Hex(IOutputStream& out, unsigned char val) { + static const char sHexCodes[] = "0123456789ABCDEF"; + return out << sHexCodes[(val >> 4) & 0xF] << sHexCodes[val & 0xF]; + } + + IOutputStream& TEncoder::EncodeAll(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) + Encode(out, val[i]); + return out; + } + + IOutputStream& TEncoder::EncodeNotAlnum(IOutputStream& out, const TStringBuf& val) { + for (size_t i = 0; i != val.length(); ++i) { + const char c = val[i]; + if (IsAlnum(c)) + out << c; + else + Encode(out, c); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsAllowed(fldmask)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + IOutputStream& TEncoder::EncodeField( + IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags) { + const ui32 fldmask = ui32(1) << fld; + for (size_t i = 0; i != val.length(); ++i) { + const char ch = val[i]; + if (GetFlags(ch).IsDecode(fldmask, flags)) + out << ch; + else + Encode(out, ch); + } + return out; + } + + void TEncoder::Do(unsigned char ch, int res) { + OutFlags |= GetFlags(ch).FeatFlags; + + bool escapepct = false; + if (0 < res) // definitely encode + escapepct = FldDst.Enabled(); + else if (0 != res || !FldDst.Enabled() || !FldDst.Encode(ch)) { + Out << ch; + return; + } + + Out << '%'; + if (escapepct) + Out.Write("25", 2); // '%' + Hex(Out, ch); + } + } +} |