diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/http/push_parser/http_parser.cpp | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'library/cpp/http/push_parser/http_parser.cpp')
-rw-r--r-- | library/cpp/http/push_parser/http_parser.cpp | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/library/cpp/http/push_parser/http_parser.cpp b/library/cpp/http/push_parser/http_parser.cpp new file mode 100644 index 0000000000..d36618069f --- /dev/null +++ b/library/cpp/http/push_parser/http_parser.cpp @@ -0,0 +1,345 @@ +#include "http_parser.h" + +#include <library/cpp/blockcodecs/stream.h> +#include <library/cpp/blockcodecs/codecs.h> + +#include <util/generic/string.h> +#include <util/generic/yexception.h> +#include <util/stream/mem.h> +#include <util/stream/zlib.h> +#include <util/string/ascii.h> +#include <util/string/split.h> +#include <util/string/strip.h> + +//#define DBGOUT(args) Cout << args << Endl; +#define DBGOUT(args) + +namespace { + const TString BestCodings[] = { + "gzip", + "deflate", + "br", + "x-gzip", + "x-deflate", + "y-lzo", + "y-lzf", + "y-lzq", + "y-bzip2", + "y-lzma", + }; +} + +TString THttpParser::GetBestCompressionScheme() const { + if (AcceptEncodings_.contains("*")) { + return BestCodings[0]; + } + + for (auto& coding : BestCodings) { + if (AcceptEncodings_.contains(coding)) { + return coding; + } + } + + return TString(); +} + +bool THttpParser::FirstLineParser() { + if (Y_UNLIKELY(!ReadLine())) { + return false; + } + + CurrentLine_.swap(FirstLine_); + + try { + TStringBuf s(FirstLine_); + if (MessageType_ == Response) { + // Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + TStringBuf httpVersion, statusCode; + GetNext(s, ' ', httpVersion); + ParseHttpVersion(httpVersion); + GetNext(s, ' ', statusCode); + RetCode_ = FromString<unsigned>(statusCode); + } else { + // Request-Line = Method SP Request-URI SP HTTP-Version CRLF + TStringBuf httpVersion = s.After(' ').After(' '); + ParseHttpVersion(httpVersion); + } + } catch (...) { + throw THttpParseException() << "Cannot parse first line: " << CurrentExceptionMessage() << " First 80 chars of line: " << FirstLine_.substr(0, Min<size_t>(80ull, FirstLine_.size())).Quote(); + } + + return HeadersParser(); +} + +bool THttpParser::HeadersParser() { + while (ReadLine()) { + if (!CurrentLine_) { + //end of headers + DBGOUT("end of headers()"); + ParseHeaderLine(); + + if (HasContentLength_) { + if (ContentLength_ == 0) { + return OnEndParsing(); + } + + if (ContentLength_ < 1000000) { + Content_.reserve(ContentLength_ + 1); + } + } + + return !!ChunkInputState_ ? ChunkedContentParser() : ContentParser(); + } + + if (CurrentLine_[0] == ' ' || CurrentLine_[0] == '\t') { + //continue previous header-line + HeaderLine_ += CurrentLine_; + CurrentLine_.remove(0); + } else { + ParseHeaderLine(); + HeaderLine_.swap(CurrentLine_); + } + } + + Parser_ = &THttpParser::HeadersParser; + return false; +} + +bool THttpParser::ContentParser() { + DBGOUT("Content parsing()"); + if (HasContentLength_) { + size_t rd = Min<size_t>(DataEnd_ - Data_, ContentLength_ - Content_.size()); + Content_.append(Data_, rd); + Data_ += rd; + DBGOUT("Content parsing: " << Content_.Size() << " from " << ContentLength_); + if (Content_.size() == ContentLength_) { + return OnEndParsing(); + } + } else { + if (MessageType_ == Request) { + return OnEndParsing(); //RFC2616 4.4-5 + } else if (Y_UNLIKELY(RetCode() < 200 || RetCode() == 204 || RetCode() == 304)) { + return OnEndParsing(); //RFC2616 4.4-1 (but not checked HEAD request type !) + } + + Content_.append(Data_, DataEnd_); + Data_ = DataEnd_; + } + Parser_ = &THttpParser::ContentParser; + return false; +} + +bool THttpParser::ChunkedContentParser() { + DBGOUT("ReadChunkedContent"); + TChunkInputState& ci = *ChunkInputState_; + + if (Content_.capacity() < static_cast<size_t>(DataEnd_ - Data_)) { + //try reduce memory reallocations + Content_.reserve(DataEnd_ - Data_); + } + + do { + if (!ci.LeftBytes_) { + if (Y_UNLIKELY(!ReadLine())) { //read first chunk size or CRLF from prev chunk or CRLF from last chunk + break; + } + + if (Y_UNLIKELY(ci.ReadLastChunk_)) { + return OnEndParsing(); + } + + if (!CurrentLine_) { + // skip crlf from previous chunk + if (!ReadLine()) { + break; + } + } + Y_ENSURE(CurrentLine_.size(), "NEH: LeftBytes hex number cannot be empty. "); + size_t size = CurrentLine_.find_first_of(" \t;"); + if (size == TString::npos) { + size = CurrentLine_.size(); + } + ci.LeftBytes_ = IntFromString<ui32, 16, char>(CurrentLine_.c_str(), size); + CurrentLine_.remove(0); + if (!ci.LeftBytes_) { //detectect end of context marker - zero-size chunk, need read CRLF after empty chunk + ci.ReadLastChunk_ = true; + if (ReadLine()) { + return OnEndParsing(); + } else { + break; + } + } + } + + size_t rd = Min<size_t>(DataEnd_ - Data_, ci.LeftBytes_); + Content_.append(Data_, rd); + Data_ += rd; + ci.LeftBytes_ -= rd; + } while (Data_ != DataEnd_); + + Parser_ = &THttpParser::ChunkedContentParser; + return false; +} + +bool THttpParser::OnEndParsing() { + Parser_ = &THttpParser::OnEndParsing; + ExtraDataSize_ = DataEnd_ - Data_; + return true; +} + +//continue read to CurrentLine_ +bool THttpParser::ReadLine() { + TStringBuf in(Data_, DataEnd_); + size_t endl = in.find('\n'); + + if (Y_UNLIKELY(endl == TStringBuf::npos)) { + //input line not completed + CurrentLine_.append(Data_, DataEnd_); + return false; + } + + CurrentLine_.append(in.data(), endl); + if (Y_LIKELY(CurrentLine_.size())) { + //remove '\r' from tail + size_t withoutCR = CurrentLine_.size() - 1; + if (CurrentLine_[withoutCR] == '\r') { + CurrentLine_.remove(withoutCR); + } + } + + //Cout << "ReadLine:" << CurrentLine_ << Endl; + Data_ += endl + 1; + return true; +} + +void THttpParser::ParseHttpVersion(TStringBuf httpVersion) { + if (!httpVersion.StartsWith("HTTP/", 5)) { + throw yexception() << "expect 'HTTP/'"; + } + httpVersion.Skip(5); + { + TStringBuf major, minor; + Split(httpVersion, '.', major, minor); + HttpVersion_.Major = FromString<unsigned>(major); + HttpVersion_.Minor = FromString<unsigned>(minor); + if (Y_LIKELY(HttpVersion_.Major > 1 || HttpVersion_.Minor > 0)) { + // since HTTP/1.1 Keep-Alive is default behaviour + KeepAlive_ = true; + } + } +} + +void THttpParser::ParseHeaderLine() { + if (!!HeaderLine_) { + if (CollectHeaders_) { + THttpInputHeader hdr(HeaderLine_); + + Headers_.AddHeader(hdr); + + ApplyHeaderLine(hdr.Name(), hdr.Value()); + } else { + //some dirty optimization (avoid reallocation new strings) + size_t pos = HeaderLine_.find(':'); + + if (pos == TString::npos) { + ythrow THttpParseException() << "can not parse http header(" << HeaderLine_.Quote() << ")"; + } + + TStringBuf name(StripString(TStringBuf(HeaderLine_.begin(), HeaderLine_.begin() + pos))); + TStringBuf val(StripString(TStringBuf(HeaderLine_.begin() + pos + 1, HeaderLine_.end()))); + ApplyHeaderLine(name, val); + } + HeaderLine_.remove(0); + } +} + +void THttpParser::OnEof() { + if (Parser_ == &THttpParser::ContentParser && !HasContentLength_ && !ChunkInputState_) { + return; //end of content determined by end of input + } + throw THttpException() << TStringBuf("incompleted http response"); +} + +bool THttpParser::DecodeContent() { + if (!ContentEncoding_ || ContentEncoding_ == "identity" || ContentEncoding_ == "none") { + DecodedContent_ = Content_; + return false; + } + + TMemoryInput in(Content_.data(), Content_.size()); + if (ContentEncoding_ == "gzip") { + auto decompressor = TZLibDecompress(&in, ZLib::GZip); + if (!GzipAllowMultipleStreams_) { + decompressor.SetAllowMultipleStreams(false); + } + DecodedContent_ = decompressor.ReadAll(); + } else if (ContentEncoding_ == "deflate") { + + //https://tools.ietf.org/html/rfc1950 + bool definitelyNoZlibHeader; + if (Content_.size() < 2) { + definitelyNoZlibHeader = true; + } else { + const ui16 cmf = static_cast<ui8>(Content_[0]); + const ui16 flg = static_cast<ui8>(Content_[1]); + definitelyNoZlibHeader = ((cmf << 8) | flg) % 31 != 0; + } + + try { + DecodedContent_ = TZLibDecompress(&in, definitelyNoZlibHeader ? ZLib::Raw : ZLib::ZLib).ReadAll(); + } + catch(...) { + if (definitelyNoZlibHeader) { + throw; + } + TMemoryInput retryInput(Content_.data(), Content_.size()); + DecodedContent_ = TZLibDecompress(&retryInput, ZLib::Raw).ReadAll(); + } + } else if (ContentEncoding_.StartsWith("z-")) { + // opposite for library/cpp/http/io/stream.h + const NBlockCodecs::ICodec* codec = nullptr; + try { + const TStringBuf codecName = TStringBuf(ContentEncoding_).SubStr(2); + if (codecName.StartsWith("zstd06") || codecName.StartsWith("zstd08")) { + ythrow NBlockCodecs::TNotFound() << codecName; + } + codec = NBlockCodecs::Codec(codecName); + } catch(const NBlockCodecs::TNotFound& exc) { + throw THttpParseException() << "Unsupported content-encoding method: " << exc.AsStrBuf(); + } + NBlockCodecs::TDecodedInput decoder(&in, codec); + DecodedContent_ = decoder.ReadAll(); + } else { + throw THttpParseException() << "Unsupported content-encoding method: " << ContentEncoding_; + } + return true; +} + +void THttpParser::ApplyHeaderLine(const TStringBuf& name, const TStringBuf& val) { + if (AsciiEqualsIgnoreCase(name, TStringBuf("connection"))) { + KeepAlive_ = AsciiEqualsIgnoreCase(val, TStringBuf("keep-alive")); + } else if (AsciiEqualsIgnoreCase(name, TStringBuf("content-length"))) { + Y_ENSURE(val.size(), "NEH: Content-Length cannot be empty string. "); + ContentLength_ = FromString<ui64>(val); + HasContentLength_ = true; + } else if (AsciiEqualsIgnoreCase(name, TStringBuf("transfer-encoding"))) { + if (AsciiEqualsIgnoreCase(val, TStringBuf("chunked"))) { + ChunkInputState_ = new TChunkInputState(); + } + } else if (AsciiEqualsIgnoreCase(name, TStringBuf("accept-encoding"))) { + TStringBuf encodings(val); + while (encodings.size()) { + TStringBuf enc = encodings.NextTok(',').After(' ').Before(' '); + if (!enc) { + continue; + } + TString s(enc); + s.to_lower(); + AcceptEncodings_.insert(s); + } + } else if (AsciiEqualsIgnoreCase(name, TStringBuf("content-encoding"))) { + TString s(val); + s.to_lower(); + ContentEncoding_ = s; + } +} |