aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/http/push_parser/http_parser.cpp
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/http/push_parser/http_parser.cpp
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/http/push_parser/http_parser.cpp')
-rw-r--r--library/cpp/http/push_parser/http_parser.cpp345
1 files changed, 345 insertions, 0 deletions
diff --git a/library/cpp/http/push_parser/http_parser.cpp b/library/cpp/http/push_parser/http_parser.cpp
new file mode 100644
index 0000000000..d36618069f
--- /dev/null
+++ b/library/cpp/http/push_parser/http_parser.cpp
@@ -0,0 +1,345 @@
+#include "http_parser.h"
+
+#include <library/cpp/blockcodecs/stream.h>
+#include <library/cpp/blockcodecs/codecs.h>
+
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/stream/mem.h>
+#include <util/stream/zlib.h>
+#include <util/string/ascii.h>
+#include <util/string/split.h>
+#include <util/string/strip.h>
+
+//#define DBGOUT(args) Cout << args << Endl;
+#define DBGOUT(args)
+
+namespace {
+ const TString BestCodings[] = {
+ "gzip",
+ "deflate",
+ "br",
+ "x-gzip",
+ "x-deflate",
+ "y-lzo",
+ "y-lzf",
+ "y-lzq",
+ "y-bzip2",
+ "y-lzma",
+ };
+}
+
+TString THttpParser::GetBestCompressionScheme() const {
+ if (AcceptEncodings_.contains("*")) {
+ return BestCodings[0];
+ }
+
+ for (auto& coding : BestCodings) {
+ if (AcceptEncodings_.contains(coding)) {
+ return coding;
+ }
+ }
+
+ return TString();
+}
+
+bool THttpParser::FirstLineParser() {
+ if (Y_UNLIKELY(!ReadLine())) {
+ return false;
+ }
+
+ CurrentLine_.swap(FirstLine_);
+
+ try {
+ TStringBuf s(FirstLine_);
+ if (MessageType_ == Response) {
+ // Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF
+ TStringBuf httpVersion, statusCode;
+ GetNext(s, ' ', httpVersion);
+ ParseHttpVersion(httpVersion);
+ GetNext(s, ' ', statusCode);
+ RetCode_ = FromString<unsigned>(statusCode);
+ } else {
+ // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
+ TStringBuf httpVersion = s.After(' ').After(' ');
+ ParseHttpVersion(httpVersion);
+ }
+ } catch (...) {
+ throw THttpParseException() << "Cannot parse first line: " << CurrentExceptionMessage() << " First 80 chars of line: " << FirstLine_.substr(0, Min<size_t>(80ull, FirstLine_.size())).Quote();
+ }
+
+ return HeadersParser();
+}
+
+bool THttpParser::HeadersParser() {
+ while (ReadLine()) {
+ if (!CurrentLine_) {
+ //end of headers
+ DBGOUT("end of headers()");
+ ParseHeaderLine();
+
+ if (HasContentLength_) {
+ if (ContentLength_ == 0) {
+ return OnEndParsing();
+ }
+
+ if (ContentLength_ < 1000000) {
+ Content_.reserve(ContentLength_ + 1);
+ }
+ }
+
+ return !!ChunkInputState_ ? ChunkedContentParser() : ContentParser();
+ }
+
+ if (CurrentLine_[0] == ' ' || CurrentLine_[0] == '\t') {
+ //continue previous header-line
+ HeaderLine_ += CurrentLine_;
+ CurrentLine_.remove(0);
+ } else {
+ ParseHeaderLine();
+ HeaderLine_.swap(CurrentLine_);
+ }
+ }
+
+ Parser_ = &THttpParser::HeadersParser;
+ return false;
+}
+
+bool THttpParser::ContentParser() {
+ DBGOUT("Content parsing()");
+ if (HasContentLength_) {
+ size_t rd = Min<size_t>(DataEnd_ - Data_, ContentLength_ - Content_.size());
+ Content_.append(Data_, rd);
+ Data_ += rd;
+ DBGOUT("Content parsing: " << Content_.Size() << " from " << ContentLength_);
+ if (Content_.size() == ContentLength_) {
+ return OnEndParsing();
+ }
+ } else {
+ if (MessageType_ == Request) {
+ return OnEndParsing(); //RFC2616 4.4-5
+ } else if (Y_UNLIKELY(RetCode() < 200 || RetCode() == 204 || RetCode() == 304)) {
+ return OnEndParsing(); //RFC2616 4.4-1 (but not checked HEAD request type !)
+ }
+
+ Content_.append(Data_, DataEnd_);
+ Data_ = DataEnd_;
+ }
+ Parser_ = &THttpParser::ContentParser;
+ return false;
+}
+
+bool THttpParser::ChunkedContentParser() {
+ DBGOUT("ReadChunkedContent");
+ TChunkInputState& ci = *ChunkInputState_;
+
+ if (Content_.capacity() < static_cast<size_t>(DataEnd_ - Data_)) {
+ //try reduce memory reallocations
+ Content_.reserve(DataEnd_ - Data_);
+ }
+
+ do {
+ if (!ci.LeftBytes_) {
+ if (Y_UNLIKELY(!ReadLine())) { //read first chunk size or CRLF from prev chunk or CRLF from last chunk
+ break;
+ }
+
+ if (Y_UNLIKELY(ci.ReadLastChunk_)) {
+ return OnEndParsing();
+ }
+
+ if (!CurrentLine_) {
+ // skip crlf from previous chunk
+ if (!ReadLine()) {
+ break;
+ }
+ }
+ Y_ENSURE(CurrentLine_.size(), "NEH: LeftBytes hex number cannot be empty. ");
+ size_t size = CurrentLine_.find_first_of(" \t;");
+ if (size == TString::npos) {
+ size = CurrentLine_.size();
+ }
+ ci.LeftBytes_ = IntFromString<ui32, 16, char>(CurrentLine_.c_str(), size);
+ CurrentLine_.remove(0);
+ if (!ci.LeftBytes_) { //detectect end of context marker - zero-size chunk, need read CRLF after empty chunk
+ ci.ReadLastChunk_ = true;
+ if (ReadLine()) {
+ return OnEndParsing();
+ } else {
+ break;
+ }
+ }
+ }
+
+ size_t rd = Min<size_t>(DataEnd_ - Data_, ci.LeftBytes_);
+ Content_.append(Data_, rd);
+ Data_ += rd;
+ ci.LeftBytes_ -= rd;
+ } while (Data_ != DataEnd_);
+
+ Parser_ = &THttpParser::ChunkedContentParser;
+ return false;
+}
+
+bool THttpParser::OnEndParsing() {
+ Parser_ = &THttpParser::OnEndParsing;
+ ExtraDataSize_ = DataEnd_ - Data_;
+ return true;
+}
+
+//continue read to CurrentLine_
+bool THttpParser::ReadLine() {
+ TStringBuf in(Data_, DataEnd_);
+ size_t endl = in.find('\n');
+
+ if (Y_UNLIKELY(endl == TStringBuf::npos)) {
+ //input line not completed
+ CurrentLine_.append(Data_, DataEnd_);
+ return false;
+ }
+
+ CurrentLine_.append(in.data(), endl);
+ if (Y_LIKELY(CurrentLine_.size())) {
+ //remove '\r' from tail
+ size_t withoutCR = CurrentLine_.size() - 1;
+ if (CurrentLine_[withoutCR] == '\r') {
+ CurrentLine_.remove(withoutCR);
+ }
+ }
+
+ //Cout << "ReadLine:" << CurrentLine_ << Endl;
+ Data_ += endl + 1;
+ return true;
+}
+
+void THttpParser::ParseHttpVersion(TStringBuf httpVersion) {
+ if (!httpVersion.StartsWith("HTTP/", 5)) {
+ throw yexception() << "expect 'HTTP/'";
+ }
+ httpVersion.Skip(5);
+ {
+ TStringBuf major, minor;
+ Split(httpVersion, '.', major, minor);
+ HttpVersion_.Major = FromString<unsigned>(major);
+ HttpVersion_.Minor = FromString<unsigned>(minor);
+ if (Y_LIKELY(HttpVersion_.Major > 1 || HttpVersion_.Minor > 0)) {
+ // since HTTP/1.1 Keep-Alive is default behaviour
+ KeepAlive_ = true;
+ }
+ }
+}
+
+void THttpParser::ParseHeaderLine() {
+ if (!!HeaderLine_) {
+ if (CollectHeaders_) {
+ THttpInputHeader hdr(HeaderLine_);
+
+ Headers_.AddHeader(hdr);
+
+ ApplyHeaderLine(hdr.Name(), hdr.Value());
+ } else {
+ //some dirty optimization (avoid reallocation new strings)
+ size_t pos = HeaderLine_.find(':');
+
+ if (pos == TString::npos) {
+ ythrow THttpParseException() << "can not parse http header(" << HeaderLine_.Quote() << ")";
+ }
+
+ TStringBuf name(StripString(TStringBuf(HeaderLine_.begin(), HeaderLine_.begin() + pos)));
+ TStringBuf val(StripString(TStringBuf(HeaderLine_.begin() + pos + 1, HeaderLine_.end())));
+ ApplyHeaderLine(name, val);
+ }
+ HeaderLine_.remove(0);
+ }
+}
+
+void THttpParser::OnEof() {
+ if (Parser_ == &THttpParser::ContentParser && !HasContentLength_ && !ChunkInputState_) {
+ return; //end of content determined by end of input
+ }
+ throw THttpException() << TStringBuf("incompleted http response");
+}
+
+bool THttpParser::DecodeContent() {
+ if (!ContentEncoding_ || ContentEncoding_ == "identity" || ContentEncoding_ == "none") {
+ DecodedContent_ = Content_;
+ return false;
+ }
+
+ TMemoryInput in(Content_.data(), Content_.size());
+ if (ContentEncoding_ == "gzip") {
+ auto decompressor = TZLibDecompress(&in, ZLib::GZip);
+ if (!GzipAllowMultipleStreams_) {
+ decompressor.SetAllowMultipleStreams(false);
+ }
+ DecodedContent_ = decompressor.ReadAll();
+ } else if (ContentEncoding_ == "deflate") {
+
+ //https://tools.ietf.org/html/rfc1950
+ bool definitelyNoZlibHeader;
+ if (Content_.size() < 2) {
+ definitelyNoZlibHeader = true;
+ } else {
+ const ui16 cmf = static_cast<ui8>(Content_[0]);
+ const ui16 flg = static_cast<ui8>(Content_[1]);
+ definitelyNoZlibHeader = ((cmf << 8) | flg) % 31 != 0;
+ }
+
+ try {
+ DecodedContent_ = TZLibDecompress(&in, definitelyNoZlibHeader ? ZLib::Raw : ZLib::ZLib).ReadAll();
+ }
+ catch(...) {
+ if (definitelyNoZlibHeader) {
+ throw;
+ }
+ TMemoryInput retryInput(Content_.data(), Content_.size());
+ DecodedContent_ = TZLibDecompress(&retryInput, ZLib::Raw).ReadAll();
+ }
+ } else if (ContentEncoding_.StartsWith("z-")) {
+ // opposite for library/cpp/http/io/stream.h
+ const NBlockCodecs::ICodec* codec = nullptr;
+ try {
+ const TStringBuf codecName = TStringBuf(ContentEncoding_).SubStr(2);
+ if (codecName.StartsWith("zstd06") || codecName.StartsWith("zstd08")) {
+ ythrow NBlockCodecs::TNotFound() << codecName;
+ }
+ codec = NBlockCodecs::Codec(codecName);
+ } catch(const NBlockCodecs::TNotFound& exc) {
+ throw THttpParseException() << "Unsupported content-encoding method: " << exc.AsStrBuf();
+ }
+ NBlockCodecs::TDecodedInput decoder(&in, codec);
+ DecodedContent_ = decoder.ReadAll();
+ } else {
+ throw THttpParseException() << "Unsupported content-encoding method: " << ContentEncoding_;
+ }
+ return true;
+}
+
+void THttpParser::ApplyHeaderLine(const TStringBuf& name, const TStringBuf& val) {
+ if (AsciiEqualsIgnoreCase(name, TStringBuf("connection"))) {
+ KeepAlive_ = AsciiEqualsIgnoreCase(val, TStringBuf("keep-alive"));
+ } else if (AsciiEqualsIgnoreCase(name, TStringBuf("content-length"))) {
+ Y_ENSURE(val.size(), "NEH: Content-Length cannot be empty string. ");
+ ContentLength_ = FromString<ui64>(val);
+ HasContentLength_ = true;
+ } else if (AsciiEqualsIgnoreCase(name, TStringBuf("transfer-encoding"))) {
+ if (AsciiEqualsIgnoreCase(val, TStringBuf("chunked"))) {
+ ChunkInputState_ = new TChunkInputState();
+ }
+ } else if (AsciiEqualsIgnoreCase(name, TStringBuf("accept-encoding"))) {
+ TStringBuf encodings(val);
+ while (encodings.size()) {
+ TStringBuf enc = encodings.NextTok(',').After(' ').Before(' ');
+ if (!enc) {
+ continue;
+ }
+ TString s(enc);
+ s.to_lower();
+ AcceptEncodings_.insert(s);
+ }
+ } else if (AsciiEqualsIgnoreCase(name, TStringBuf("content-encoding"))) {
+ TString s(val);
+ s.to_lower();
+ ContentEncoding_ = s;
+ }
+}