aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/http/fetch/httpfetcher.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/http/fetch/httpfetcher.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/http/fetch/httpfetcher.h')
-rw-r--r--library/cpp/http/fetch/httpfetcher.h171
1 files changed, 171 insertions, 0 deletions
diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h
new file mode 100644
index 0000000000..7fc251afd2
--- /dev/null
+++ b/library/cpp/http/fetch/httpfetcher.h
@@ -0,0 +1,171 @@
+#pragma once
+
+#ifdef _MSC_VER
+#include <io.h>
+#endif
+
+#include <library/cpp/http/misc/httpdate.h>
+
+#include "httpagent.h"
+#include "httpparser.h"
+
+struct TFakeBackup {
+ int Write(void* /*buf*/, size_t /*size*/) {
+ return 0;
+ }
+};
+
+template <size_t bufsize = 5000>
+struct TFakeAlloc {
+ void Shrink(void* /*buf*/, size_t /*size*/) {
+ }
+ void* Grab(size_t /*min*/, size_t* real) {
+ *real = bufsize;
+ return buf;
+ }
+ char buf[bufsize];
+};
+
+template <typename TAlloc = TFakeAlloc<>,
+ typename TCheck = TFakeCheck<>,
+ typename TWriter = TFakeBackup,
+ typename TAgent = THttpAgent<>>
+class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent {
+public:
+ static const size_t TCP_MIN = 1500;
+ static int TerminateNow;
+
+ THttpFetcher()
+ : THttpParser<TCheck>()
+ , TAlloc()
+ , TWriter()
+ , TAgent()
+ {
+ }
+
+ virtual ~THttpFetcher() {
+ }
+
+ int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) {
+ int ret = 0;
+ int fetcherr = 0;
+
+ THttpParser<TCheck>::Init(header, head_request);
+ const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme());
+ size_t schemelen = strlen(scheme);
+ if (*path == '/') {
+ header->base = TStringBuf(scheme, schemelen);
+ header->base += TStringBuf("://", 3);
+ header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg);
+ header->base += path;
+ } else {
+ if (strlen(path) >= FETCHER_URL_MAX) {
+ header->error = HTTP_URL_TOO_LARGE;
+ return 0;
+ }
+ header->base = path;
+ }
+
+ if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) {
+ header->error = (i16)ret;
+ return 0;
+ }
+
+ bool inheader = 1;
+ void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr;
+ ssize_t got;
+ size_t buffree = 0, bufsize = 0, buflen = 0;
+ size_t maxsize = TCheck::GetMaxHeaderSize();
+ do {
+ if (buffree < TCP_MIN) {
+ if (buf) {
+ TAlloc::Shrink(buf, buflen - buffree);
+ if (TWriter::Write(buf, buflen - buffree) < 0) {
+ buf = nullptr;
+ ret = EIO;
+ break;
+ }
+ }
+ if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) {
+ ret = ENOMEM;
+ break;
+ }
+ bufptr = buf;
+ buffree = buflen;
+ }
+ if ((got = TAgent::read(bufptr, buffree)) < 0) {
+ fetcherr = errno;
+ if (errno == EINTR)
+ header->error = HTTP_INTERRUPTED;
+ else if (errno == ETIMEDOUT)
+ header->error = HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING;
+ else
+ header->error = HTTP_CONNECTION_LOST;
+
+ break;
+ }
+
+ parsebuf = bufptr;
+ bufptr = (char*)bufptr + got;
+ bufsize += got;
+ buffree -= got;
+
+ THttpParser<TCheck>::Parse(parsebuf, got);
+
+ if (header->error)
+ break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached
+
+ if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) {
+ inheader = 0;
+ if (TCheck::Check(header))
+ break;
+ if (header->header_size > (long)maxsize) {
+ header->error = HTTP_HEADER_TOO_LARGE;
+ break;
+ }
+ }
+ if (!inheader) {
+ maxsize = TCheck::GetMaxBodySize(header);
+ }
+ if (header->http_status >= HTTP_EXTENDED)
+ break;
+ if (bufsize > maxsize) {
+ header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE;
+ break;
+ }
+ if (TerminateNow) {
+ header->error = HTTP_INTERRUPTED;
+ break;
+ }
+ } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof);
+
+ i64 Adjustment = 0;
+ if (!header->error) {
+ if (header->transfer_chunked) {
+ Adjustment = header->header_size + header->entity_size - bufsize - 1;
+ } else if (header->content_length >= 0) {
+ Adjustment = header->header_size + header->content_length - bufsize;
+ }
+ if (Adjustment > 0)
+ Adjustment = 0;
+ }
+
+ if (buf) {
+ TAlloc::Shrink(buf, buflen - buffree + Adjustment);
+
+ if (TWriter::Write(buf, buflen - buffree) < 0)
+ ret = EIO;
+ }
+ TCheck::CheckEndDoc(header);
+ if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) {
+ TAgent::Disconnect();
+ if (!fetcherr)
+ fetcherr = errno;
+ }
+ errno = fetcherr;
+ return ret;
+ }
+};
+
+template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent>
+int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0;