diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/http/fetch/httpparser.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/http/fetch/httpparser.h')
-rw-r--r-- | library/cpp/http/fetch/httpparser.h | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h new file mode 100644 index 0000000000..769828e4ae --- /dev/null +++ b/library/cpp/http/fetch/httpparser.h @@ -0,0 +1,372 @@ +#pragma once + +#include "httpfsm.h" +#include "httpheader.h" + +#include <library/cpp/mime/types/mime.h> +#include <util/system/yassert.h> +#include <library/cpp/http/misc/httpcodes.h> + +template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20> +struct TFakeCheck { + bool Check(THttpHeader* /*header*/) { + return false; + } + void CheckDocPart(void* /*buf*/, size_t /*len*/, THttpHeader* /*header*/) { + } //for every part of DocumentBody will be called + void CheckEndDoc(THttpHeader* /*header*/) { + } + size_t GetMaxHeaderSize() { + return headermax; + } + size_t GetMaxBodySize(THttpHeader*) { + return bodymax; + } +}; + +class THttpParserBase { +public: + enum States { + hp_error, + hp_eof, + hp_in_header, + hp_read_alive, + hp_read_closed, + hp_begin_chunk_header, + hp_chunk_header, + hp_read_chunk + }; + + States GetState() { + return State; + } + + void setAssumeConnectionClosed(int value) { + AssumeConnectionClosed = value; + } + + THttpHeader* GetHttpHeader() const { + return Header; + } + +protected: + int CheckHeaders() { + if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) { + Header->content_length = 0; + Header->transfer_chunked = 0; + } + if (Header->transfer_chunked < -1) { + Header->error = HTTP_BAD_ENCODING; + return 1; + } else if (Header->transfer_chunked == -1) { + Header->transfer_chunked = 0; + } + if (!Header->transfer_chunked && Header->content_length < -1) { + Header->error = HTTP_BAD_CONTENT_LENGTH; + return 1; + } + if (Header->http_status == HTTP_OK) { + if (Header->compression_method != HTTP_COMPRESSION_UNSET && + Header->compression_method != HTTP_COMPRESSION_IDENTITY && + Header->compression_method != HTTP_COMPRESSION_GZIP && + Header->compression_method != HTTP_COMPRESSION_DEFLATE) + { + Header->error = HTTP_BAD_CONTENT_ENCODING; + return 1; + } + } + if (Header->connection_closed == -1) + Header->connection_closed = (Header->http_minor == 0 || + AssumeConnectionClosed); + if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) { + Header->error = HTTP_LENGTH_UNKNOWN; + return 1; + } + if (Header->http_time < 0) + Header->http_time = 0; + if (Header->mime_type < 0) + Header->mime_type = MIME_UNKNOWN; + return 0; + } + + THttpHeaderParser HeaderParser; + THttpChunkParser ChunkParser; + States State; + long ChunkSize; + THttpHeader* Header; + int AssumeConnectionClosed; + bool HeadRequest; +}; + +template <int isReader, typename TCheck = TFakeCheck<>> +class THttpParserGeneric: public THttpParserBase, public TCheck { +protected: + long ParseGeneric(void*& buf, long& size) { + if (!size) { + switch (State) { + case hp_error: + case hp_eof: + break; + case hp_read_closed: + State = hp_eof; + break; + case hp_in_header: + Header->error = HTTP_HEADER_EOF; + State = hp_error; + break; + case hp_read_alive: + case hp_read_chunk: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_MESSAGE_EOF; + State = hp_error; + } + break; + case hp_begin_chunk_header: + case hp_chunk_header: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_CHUNK_EOF; + State = hp_error; + } + break; + } + return 0; + } + while (size) { + int ret; + + switch (State) { + case hp_error: + return 0; + + case hp_eof: + return 0; + + case hp_in_header: + if ((ret = HeaderParser.Execute(buf, size)) < 0) { + Header->error = HTTP_BAD_HEADER_STRING; + State = hp_error; + return 0; + } else if (ret == 2) { + Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1); + size -= long(HeaderParser.lastchar - (char*)buf + 1); + buf = HeaderParser.lastchar + 1; + State = CheckHeaders() ? hp_error + : Header->transfer_chunked ? hp_begin_chunk_header + : Header->content_length == 0 ? hp_eof + : Header->content_length > 0 ? hp_read_alive + : hp_read_closed; + if (State == hp_begin_chunk_header) { + // unget \n for chunk reader + buf = (char*)buf - 1; + size++; + } + if (isReader) + return size; + } else { + Header->header_size += size; + size = 0; + } + break; + + case hp_read_alive: + Header->entity_size += size; + if (Header->entity_size >= Header->content_length) { + State = hp_eof; + } + + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_read_closed: + Header->entity_size += size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_begin_chunk_header: + ChunkParser.Init(); + State = hp_chunk_header; + [[fallthrough]]; + + case hp_chunk_header: + if ((ret = ChunkParser.Execute(buf, size)) < 0) { + Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK); + State = hp_error; + return 0; + } else if (ret == 2) { + Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1); + size -= long(ChunkParser.lastchar - (char*)buf + 1); + buf = ChunkParser.lastchar + 1; + ChunkSize = ChunkParser.chunk_length; + Y_ASSERT(ChunkSize >= 0); + State = ChunkSize ? hp_read_chunk : hp_eof; + } else { + Header->entity_size += size; + size = 0; + } + break; + + case hp_read_chunk: + if (size >= ChunkSize) { + Header->entity_size += ChunkSize; + State = hp_begin_chunk_header; + TCheck::CheckDocPart(buf, ChunkSize, Header); + if (isReader) + return ChunkSize; + size -= ChunkSize; + buf = (char*)buf + ChunkSize; + } else { + Header->entity_size += size; + ChunkSize -= size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + } + break; + } + } + return size; + } +}; + +template <class TCheck = TFakeCheck<>> +class THttpParser: public THttpParserGeneric<0, TCheck> { + typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG! +public: + void Init(THttpHeader* H, bool head_request = false) { + TBaseT::Header = H; + TBaseT::HeaderParser.Init(TBaseT::Header); + TBaseT::State = TBaseT::hp_in_header; + TBaseT::AssumeConnectionClosed = 0; + TBaseT::HeadRequest = head_request; + } + + void Parse(void* buf, long size) { + TBaseT::ParseGeneric(buf, size); + } +}; + +class TMemoReader { +public: + int Init(void* buf, long bufsize) { + Buf = buf; + Bufsize = bufsize; + return 0; + } + long Read(void*& buf) { + Y_ASSERT(Bufsize >= 0); + if (!Bufsize) { + Bufsize = -1; + return 0; + } + buf = Buf; + long ret = Bufsize; + Bufsize = 0; + return ret; + } + +protected: + long Bufsize; + void* Buf; +}; + +template <class Reader> +class THttpReader: public THttpParserGeneric<1>, public Reader { + typedef THttpParserGeneric<1> TBaseT; + +public: + using TBaseT::AssumeConnectionClosed; + using TBaseT::Header; + using TBaseT::ParseGeneric; + using TBaseT::State; + + int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) { + Header = H; + Eoferr = 1; + Size = 0; + AssumeConnectionClosed = assumeConnectionClosed; + HeadRequest = headRequest; + return parsHeader ? ParseHeader() : SkipHeader(); + } + + long Read(void*& buf) { + long Chunk; + do { + if (!Size) { + if (Eoferr != 1) + return Eoferr; + else if ((Size = (long)Reader::Read(Ptr)) < 0) { + Header->error = HTTP_CONNECTION_LOST; + return Eoferr = -1; + } + } + Chunk = ParseGeneric(Ptr, Size); + buf = Ptr; + Ptr = (char*)Ptr + Chunk; + Size -= Chunk; + if (State == hp_eof) { + Size = 0; + Eoferr = 0; + } else if (State == hp_error) + return Eoferr = -1; + } while (!Chunk); + return Chunk; + } + +protected: + int ParseHeader() { + HeaderParser.Init(Header); + State = hp_in_header; + while (State == hp_in_header) { + if ((Size = (long)Reader::Read(Ptr)) < 0) + return Eoferr = -1; + ParseGeneric(Ptr, Size); + } + if (State == hp_error) + return Eoferr = -1; + if (State == hp_eof) + Eoferr = 0; + return 0; + } + + int SkipHeader() { + long hdrsize = Header->header_size; + while (hdrsize) { + if ((Size = (long)Reader::Read(Ptr)) <= 0) + return Eoferr = -1; + if (Size >= hdrsize) { + Size -= hdrsize; + Ptr = (char*)Ptr + hdrsize; + break; + } + hdrsize -= Size; + } + State = Header->transfer_chunked ? hp_begin_chunk_header + : Header->content_length == 0 ? hp_eof + : Header->content_length > 0 ? hp_read_alive + : hp_read_closed; + Header->entity_size = 0; + if (State == hp_eof) + Eoferr = 0; + else if (State == hp_begin_chunk_header) { + // unget \n for chunk reader + Ptr = (char*)Ptr - 1; + ++Size; + } + return 0; + } + + void* Ptr; + long Size; + int Eoferr; +}; |