diff options
author | leo <leo@yandex-team.ru> | 2022-02-10 16:46:40 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:40 +0300 |
commit | 99609724f661f7e21d1cb08e8d80e87c3632fdb3 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/http/fetch | |
parent | 980edcd3304699edf9d4e4d6a656e585028e2a72 (diff) | |
download | ydb-99609724f661f7e21d1cb08e8d80e87c3632fdb3.tar.gz |
Restoring authorship annotation for <leo@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
-rw-r--r-- | library/cpp/http/fetch/exthttpcodes.cpp | 44 | ||||
-rw-r--r-- | library/cpp/http/fetch/exthttpcodes.h | 42 | ||||
-rw-r--r-- | library/cpp/http/fetch/http_digest.cpp | 238 | ||||
-rw-r--r-- | library/cpp/http/fetch/http_digest.h | 32 | ||||
-rw-r--r-- | library/cpp/http/fetch/http_socket.cpp | 242 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfetcher.h | 212 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm.h | 74 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 434 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpheader.h | 84 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpload.cpp | 376 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpload.h | 316 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpparser.h | 606 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpzreader.h | 440 | ||||
-rw-r--r-- | library/cpp/http/fetch/ya.make | 28 |
14 files changed, 1584 insertions, 1584 deletions
diff --git a/library/cpp/http/fetch/exthttpcodes.cpp b/library/cpp/http/fetch/exthttpcodes.cpp index ac0d3b359d..acc05650c8 100644 --- a/library/cpp/http/fetch/exthttpcodes.cpp +++ b/library/cpp/http/fetch/exthttpcodes.cpp @@ -1,18 +1,18 @@ #include "exthttpcodes.h" -#include <cstring> - +#include <cstring> + const ui16 CrazyServer = ShouldDelete | MarkSuspect; - + struct http_flag { ui16 http; ui16 flag; }; -static http_flag HTTP_FLAG[] = { +static http_flag HTTP_FLAG[] = { {HTTP_CONTINUE, MarkSuspect}, // 100 {HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101 {HTTP_PROCESSING, CrazyServer}, // 102 - + {HTTP_OK, ShouldReindex}, // 200 {HTTP_CREATED, CrazyServer}, // 201 {HTTP_ACCEPTED, ShouldDelete}, // 202 @@ -23,7 +23,7 @@ static http_flag HTTP_FLAG[] = { {HTTP_MULTI_STATUS, CrazyServer}, // 207 {HTTP_ALREADY_REPORTED, CrazyServer}, // 208 {HTTP_IM_USED, CrazyServer}, // 226 - + {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete}, // 300 {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir}, // 301 {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir}, // 302 @@ -32,7 +32,7 @@ static http_flag HTTP_FLAG[] = { {HTTP_USE_PROXY, ShouldDelete}, // 305 {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307 {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308 - + {HTTP_BAD_REQUEST, CrazyServer}, // 400 {HTTP_UNAUTHORIZED, ShouldDelete}, // 401 {HTTP_PAYMENT_REQUIRED, ShouldDelete}, // 402 @@ -53,7 +53,7 @@ static http_flag HTTP_FLAG[] = { {HTTP_EXPECTATION_FAILED, ShouldDelete}, // 417 {HTTP_I_AM_A_TEAPOT, CrazyServer}, // 418 {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete}, // 419 - + {HTTP_MISDIRECTED_REQUEST, CrazyServer}, // 421 {HTTP_UNPROCESSABLE_ENTITY, CrazyServer}, // 422 {HTTP_LOCKED, ShouldDelete}, // 423 @@ -62,7 +62,7 @@ static http_flag HTTP_FLAG[] = { {HTTP_PRECONDITION_REQUIRED, ShouldDelete}, // 428 {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429 {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete}, // 451 - + {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect}, // 500 {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect}, // 501 {HTTP_BAD_GATEWAY, MarkSuspect}, // 502 @@ -116,7 +116,7 @@ static http_flag HTTP_FLAG[] = { {HTTP_FETCHER_BAD_RESPONSE, 0}, // 1040 {HTTP_FETCHER_MB_ERROR, 0}, // 1041 {HTTP_SSL_CERT_ERROR, 0}, // 1042 - + // Custom (replace HTTP 200/304) {EXT_HTTP_MIRRMOVE, 0}, // 2000 {EXT_HTTP_MANUAL_DELETE, ShouldDelete}, // 2001 @@ -142,34 +142,34 @@ static http_flag HTTP_FLAG[] = { {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete}, // 2024 {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025 {0, 0}}; - + static ui16* prepare_flags(http_flag* arg) { - static ui16 flags[EXT_HTTP_CODE_MAX]; + static ui16 flags[EXT_HTTP_CODE_MAX]; http_flag* ptr; - size_t i; - + size_t i; + // устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше for (i = 0; i < EXT_HTTP_CODE_MAX; ++i) - flags[i] = CrazyServer; - + flags[i] = CrazyServer; + // устанавливаем флаги для перечисленных кодов for (ptr = arg; ptr->http; ++ptr) flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; - + // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их // всем кодам не перечисленным в таблице выше for (size_t group = 0; group < 1000; group += 100) for (size_t j = group + 1; j < group + 100; ++j) flags[j] = flags[group]; - + // предыдущий цикл затер некоторые флаги перечисленные в таблице выше // восстанавливаем их for (ptr = arg; ptr->http; ++ptr) flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; - - return flags; -} - + + return flags; +} + ui16* http2status = prepare_flags(HTTP_FLAG); TStringBuf ExtHttpCodeStr(int code) noexcept { diff --git a/library/cpp/http/fetch/exthttpcodes.h b/library/cpp/http/fetch/exthttpcodes.h index 88bfe8d829..6b525052cd 100644 --- a/library/cpp/http/fetch/exthttpcodes.h +++ b/library/cpp/http/fetch/exthttpcodes.h @@ -1,9 +1,9 @@ #pragma once - -#include <util/system/defaults.h> + +#include <util/system/defaults.h> #include <library/cpp/http/misc/httpcodes.h> - -enum ExtHttpCodes { + +enum ExtHttpCodes { // Custom HTTP_EXTENDED = 1000, HTTP_BAD_RESPONSE_HEADER = 1000, @@ -50,8 +50,8 @@ enum ExtHttpCodes { HTTP_FETCHER_MB_ERROR = 1041, HTTP_SSL_CERT_ERROR = 1042, HTTP_PROXY_REQUEST_CANCELED = 1051, - - // Custom (replace HTTP 200/304) + + // Custom (replace HTTP 200/304) EXT_HTTP_EXT_SUCCESS_BEGIN = 2000, // to check if code variable is in success interval EXT_HTTP_MIRRMOVE = 2000, EXT_HTTP_MANUAL_DELETE = 2001, @@ -104,22 +104,22 @@ enum ExtHttpCodes { EXT_HTTP_WRONGMULTILANG = 3023, EXT_HTTP_SOFTMIRRORS = 3024, EXT_HTTP_BIGLEVEL = 3025, - - // fast robot codes - + + // fast robot codes + EXT_HTTP_FASTHOPS = 4000, EXT_HTTP_NODOC = 4001, EXT_HTTP_MAX -}; - -enum HttpFlags { - // connection - ShouldDisconnect = 1, +}; + +enum HttpFlags { + // connection + ShouldDisconnect = 1, ShouldRetry = 2, // UNUSED 4 - - // indexer + + // indexer ShouldReindex = 8, ShouldDelete = 16, CheckLocation = 32, @@ -129,13 +129,13 @@ enum HttpFlags { // UNUSED 512 MoveRedir = 1024, CanBeFake = 2048, -}; - +}; + const size_t EXT_HTTP_CODE_MAX = 1 << 12; - -static inline int Http2Status(int code) { + +static inline int Http2Status(int code) { extern ui16* http2status; return http2status[code & (EXT_HTTP_CODE_MAX - 1)]; -} +} TStringBuf ExtHttpCodeStr(int code) noexcept; diff --git a/library/cpp/http/fetch/http_digest.cpp b/library/cpp/http/fetch/http_digest.cpp index 1c8bc6f449..1eaa02b7f2 100644 --- a/library/cpp/http/fetch/http_digest.cpp +++ b/library/cpp/http/fetch/http_digest.cpp @@ -3,204 +3,204 @@ #include <library/cpp/digest/md5/md5.h> #include <util/stream/output.h> #include <util/stream/str.h> - -/************************************************************/ -/************************************************************/ -static const char* WWW_PREFIX = "Authorization: Digest "; - -/************************************************************/ + +/************************************************************/ +/************************************************************/ +static const char* WWW_PREFIX = "Authorization: Digest "; + +/************************************************************/ httpDigestHandler::httpDigestHandler() : User_(nullptr) , Password_(nullptr) , Nonce_(nullptr) , NonceCount_(0) , HeaderInstruction_(nullptr) -{ -} - -/************************************************************/ +{ +} + +/************************************************************/ httpDigestHandler::~httpDigestHandler() { - clear(); -} - -/************************************************************/ + clear(); +} + +/************************************************************/ void httpDigestHandler::clear() { free(Nonce_); free(HeaderInstruction_); User_ = Password_ = nullptr; Nonce_ = HeaderInstruction_ = nullptr; NonceCount_ = 0; -} - -/************************************************************/ +} + +/************************************************************/ void httpDigestHandler::setAuthorization(const char* user, const char* password) { - clear(); + clear(); if (user && password) { User_ = user; Password_ = password; - } -} - -/************************************************************/ + } +} + +/************************************************************/ const char* httpDigestHandler::getHeaderInstruction() const { return HeaderInstruction_; -} - -/************************************************************/ +} + +/************************************************************/ void httpDigestHandler::generateCNonce(char* outCNonce) { - if (!*outCNonce) + if (!*outCNonce) sprintf(outCNonce, "%ld", (long)time(nullptr)); -} - -/************************************************************/ +} + +/************************************************************/ inline void addMD5(MD5& ctx, const char* value) { - ctx.Update((const unsigned char*)(value), strlen(value)); -} - + ctx.Update((const unsigned char*)(value), strlen(value)); +} + inline void addMD5(MD5& ctx, const char* value, int len) { - ctx.Update((const unsigned char*)(value), len); -} - + ctx.Update((const unsigned char*)(value), len); +} + inline void addMD5Sep(MD5& ctx) { - addMD5(ctx, ":", 1); -} - -/************************************************************/ -/* calculate H(A1) as per spec */ + addMD5(ctx, ":", 1); +} + +/************************************************************/ +/* calculate H(A1) as per spec */ void httpDigestHandler::digestCalcHA1(const THttpAuthHeader& hd, char* outSessionKey, char* outCNonce) { - MD5 ctx; - ctx.Init(); + MD5 ctx; + ctx.Init(); addMD5(ctx, User_); addMD5Sep(ctx); addMD5(ctx, hd.realm); addMD5Sep(ctx); addMD5(ctx, Password_); - + if (hd.algorithm == 1) { //MD5-sess - unsigned char digest[16]; - ctx.Final(digest); - - generateCNonce(outCNonce); - - ctx.Init(); - ctx.Update(digest, 16); + unsigned char digest[16]; + ctx.Final(digest); + + generateCNonce(outCNonce); + + ctx.Init(); + ctx.Update(digest, 16); addMD5Sep(ctx); addMD5(ctx, hd.nonce); addMD5Sep(ctx); addMD5(ctx, outCNonce); - ctx.End(outSessionKey); - } - + ctx.End(outSessionKey); + } + ctx.End(outSessionKey); -}; - -/************************************************************/ -/* calculate request-digest/response-digest as per HTTP Digest spec */ +}; + +/************************************************************/ +/* calculate request-digest/response-digest as per HTTP Digest spec */ void httpDigestHandler::digestCalcResponse(const THttpAuthHeader& hd, const char* path, const char* method, const char* nonceCount, char* outResponse, char* outCNonce) { - char HA1[33]; - digestCalcHA1(hd, HA1, outCNonce); - - char HA2[33]; - MD5 ctx; - ctx.Init(); + char HA1[33]; + digestCalcHA1(hd, HA1, outCNonce); + + char HA2[33]; + MD5 ctx; + ctx.Init(); addMD5(ctx, method); addMD5Sep(ctx); addMD5(ctx, path); - //ignore auth-int - ctx.End(HA2); - - ctx.Init(); + //ignore auth-int + ctx.End(HA2); + + ctx.Init(); addMD5(ctx, HA1, 32); addMD5Sep(ctx); addMD5(ctx, Nonce_); addMD5Sep(ctx); - + if (hd.qop_auth) { - if (!*outCNonce) - generateCNonce(outCNonce); - + if (!*outCNonce) + generateCNonce(outCNonce); + addMD5(ctx, nonceCount, 8); addMD5Sep(ctx); addMD5(ctx, outCNonce); addMD5Sep(ctx); addMD5(ctx, "auth", 4); addMD5Sep(ctx); - } + } addMD5(ctx, HA2, 32); - ctx.End(outResponse); -} - -/************************************************************/ + ctx.End(outResponse); +} + +/************************************************************/ bool httpDigestHandler::processHeader(const THttpAuthHeader* header, const char* path, const char* method, const char* cnonce) { if (!User_ || !header || !header->use_auth || !header->realm || !header->nonce) - return false; - + return false; + if (Nonce_) { if (strcmp(Nonce_, header->nonce)) { free(Nonce_); Nonce_ = nullptr; NonceCount_ = 0; - } - } + } + } if (!Nonce_) { Nonce_ = strdup(header->nonce); NonceCount_ = 0; - } + } free(HeaderInstruction_); HeaderInstruction_ = nullptr; NonceCount_++; - - char nonceCount[20]; + + char nonceCount[20]; sprintf(nonceCount, "%08d", NonceCount_); - - char CNonce[50]; - if (cnonce) - strcpy(CNonce, cnonce); - else + + char CNonce[50]; + if (cnonce) + strcpy(CNonce, cnonce); + else CNonce[0] = 0; - - char response[33]; + + char response[33]; digestCalcResponse(*header, path, method, nonceCount, response, CNonce); - - //digest-response = 1#( username | realm | nonce | digest-uri - // | response | [ algorithm ] | [cnonce] | - // [opaque] | [message-qop] | - // [nonce-count] | [auth-param] ) - - TStringStream out; + + //digest-response = 1#( username | realm | nonce | digest-uri + // | response | [ algorithm ] | [cnonce] | + // [opaque] | [message-qop] | + // [nonce-count] | [auth-param] ) + + TStringStream out; out << WWW_PREFIX << "username=\"" << User_ << "\""; - out << ", realm=\"" << header->realm << "\""; - out << ", nonce=\"" << header->nonce << "\""; - out << ", uri=\"" << path << "\""; + out << ", realm=\"" << header->realm << "\""; + out << ", nonce=\"" << header->nonce << "\""; + out << ", uri=\"" << path << "\""; if (header->algorithm == 1) - out << ", algorithm=MD5-sess"; - else - out << ", algorithm=MD5"; - if (header->qop_auth) - out << ", qop=auth"; - out << ", nc=" << nonceCount; - if (CNonce[0]) - out << ", cnonce=\"" << CNonce << "\""; - out << ", response=\"" << response << "\""; - if (header->opaque) - out << ", opaque=\"" << header->opaque << "\""; - out << "\r\n"; - + out << ", algorithm=MD5-sess"; + else + out << ", algorithm=MD5"; + if (header->qop_auth) + out << ", qop=auth"; + out << ", nc=" << nonceCount; + if (CNonce[0]) + out << ", cnonce=\"" << CNonce << "\""; + out << ", response=\"" << response << "\""; + if (header->opaque) + out << ", opaque=\"" << header->opaque << "\""; + out << "\r\n"; + TString s_out = out.Str(); HeaderInstruction_ = strdup(s_out.c_str()); - - return true; -} - -/************************************************************/ -/************************************************************/ + + return true; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/http_digest.h b/library/cpp/http/fetch/http_digest.h index 018107c2e4..3b1872d70b 100644 --- a/library/cpp/http/fetch/http_digest.h +++ b/library/cpp/http/fetch/http_digest.h @@ -1,10 +1,10 @@ #pragma once - + #include "httpheader.h" -#include <util/system/compat.h> +#include <util/system/compat.h> #include <library/cpp/http/misc/httpcodes.h> - + class httpDigestHandler { protected: const char* User_; @@ -12,36 +12,36 @@ protected: char* Nonce_; int NonceCount_; char* HeaderInstruction_; - - void clear(); - + + void clear(); + void generateCNonce(char* outCNonce); - + void digestCalcHA1(const THttpAuthHeader& hd, char* outSessionKey, char* outCNonce); - + void digestCalcResponse(const THttpAuthHeader& hd, const char* method, const char* path, const char* nonceCount, char* outResponse, char* outCNonce); - + public: - httpDigestHandler(); - ~httpDigestHandler(); - + httpDigestHandler(); + ~httpDigestHandler(); + void setAuthorization(const char* user, const char* password); bool processHeader(const THttpAuthHeader* header, const char* path, const char* method, const char* cnonce = nullptr); - + bool empty() const { return (!User_); - } - + } + const char* getHeaderInstruction() const; -}; +}; diff --git a/library/cpp/http/fetch/http_socket.cpp b/library/cpp/http/fetch/http_socket.cpp index 870d927489..1524ef04a8 100644 --- a/library/cpp/http/fetch/http_socket.cpp +++ b/library/cpp/http/fetch/http_socket.cpp @@ -1,30 +1,30 @@ -#include "httpload.h" -#include "http_digest.h" - -/************************************************************/ - -#ifdef USE_GNUTLS - -#include <gcrypt.h> -#include <gnutls/gnutls.h> +#include "httpload.h" +#include "http_digest.h" + +/************************************************************/ + +#ifdef USE_GNUTLS + +#include <gcrypt.h> +#include <gnutls/gnutls.h> #include <util/network/init.h> -#include <util/network/socket.h> -#include <util/system/mutex.h> - -/********************************************************/ -// HTTPS handler is used as implementation of -// socketAbstractHandler for work through HTTPS protocol - +#include <util/network/socket.h> +#include <util/system/mutex.h> + +/********************************************************/ +// HTTPS handler is used as implementation of +// socketAbstractHandler for work through HTTPS protocol + class socketSecureHandler: public socketRegularHandler { protected: bool IsValid_; gnutls_session Session_; gnutls_certificate_credentials Credits_; - + public: socketSecureHandler(); virtual ~socketSecureHandler(); - + virtual bool Good(); virtual int Connect(const TAddrList& addrs, TDuration Timeout); virtual void Disconnect(); @@ -32,175 +32,175 @@ public: virtual bool send(const char* message, ssize_t messlen); virtual bool peek(); virtual ssize_t read(void* buffer, ssize_t buflen); -}; - -/********************************************************/ -/********************************************************/ +}; + +/********************************************************/ +/********************************************************/ static int gcry_pthread_mutex_init(void** priv) { - int err = 0; - - try { + int err = 0; + + try { TMutex* lock = new TMutex; - *priv = lock; + *priv = lock; } catch (...) { - err = -1; - } - - return err; -} - + err = -1; + } + + return err; +} + static int gcry_pthread_mutex_destroy(void** lock) { delete static_cast<TMutex*>(*lock); - return 0; -} - + return 0; +} + static int gcry_pthread_mutex_lock(void** lock) { static_cast<TMutex*>(*lock)->Acquire(); - - return 0; -} - + + return 0; +} + static int gcry_pthread_mutex_unlock(void** lock) { static_cast<TMutex*>(*lock)->Release(); - - return 0; -} - -static struct gcry_thread_cbs gcry_threads_pthread = + + return 0; +} + +static struct gcry_thread_cbs gcry_threads_pthread = { GCRY_THREAD_OPTION_PTHREAD, NULL, gcry_pthread_mutex_init, gcry_pthread_mutex_destroy, gcry_pthread_mutex_lock, gcry_pthread_mutex_unlock, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; - -/********************************************************/ + +/********************************************************/ struct https_initor { https_initor() { gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread); - gnutls_global_init(); + gnutls_global_init(); InitNetworkSubSystem(); - } - + } + ~https_initor() { - gnutls_global_deinit(); - } -}; - -static https_initor _initor; - -/********************************************************/ + gnutls_global_deinit(); + } +}; + +static https_initor _initor; + +/********************************************************/ socketSecureHandler::socketSecureHandler() : socketRegularHandler() , IsValid_(false) , Session_() , Credits_() -{ -} - -/********************************************************/ +{ +} + +/********************************************************/ socketSecureHandler::~socketSecureHandler() { if (IsValid_) - Disconnect(); -} - -/********************************************************/ + Disconnect(); +} + +/********************************************************/ bool socketSecureHandler::Good() { return Socket_.Good() && IsValid_; -} - -/********************************************************/ +} + +/********************************************************/ int socketSecureHandler::Connect(const TAddrList& addrs, TDuration Timeout) { IsValid_ = false; - + int ret = socketRegularHandler::Connect(addrs, Timeout); - if (ret) - return ret; - + if (ret) + return ret; + gnutls_certificate_allocate_credentials(&Credits_); gnutls_init(&Session_, GNUTLS_CLIENT); gnutls_set_default_priority(Session_); gnutls_credentials_set(Session_, GNUTLS_CRD_CERTIFICATE, Credits_); - + SOCKET fd = Socket_; gnutls_transport_set_ptr(Session_, (gnutls_transport_ptr)fd); - + ret = gnutls_handshake(Session_); - + if (ret < 0) { - fprintf(stderr, "*** Handshake failed\n"); - gnutls_perror(ret); - + fprintf(stderr, "*** Handshake failed\n"); + gnutls_perror(ret); + gnutls_deinit(Session_); if (Credits_) { gnutls_certificate_free_credentials(Credits_); Credits_ = 0; - } - return 1; - } - + } + return 1; + } + IsValid_ = true; return !IsValid_; -} - -/********************************************************/ +} + +/********************************************************/ void socketSecureHandler::Disconnect() { if (IsValid_) { gnutls_bye(Session_, GNUTLS_SHUT_RDWR); IsValid_ = false; gnutls_deinit(Session_); - } - + } + if (Credits_) { gnutls_certificate_free_credentials(Credits_); Credits_ = 0; - } - - socketRegularHandler::Disconnect(); -} - -/********************************************************/ + } + + socketRegularHandler::Disconnect(); +} + +/********************************************************/ void socketSecureHandler::shutdown() { -} - -/********************************************************/ +} + +/********************************************************/ bool socketSecureHandler::send(const char* message, ssize_t messlen) { if (!IsValid_) - return false; + return false; ssize_t rv = gnutls_record_send(Session_, message, messlen); - return rv >= 0; -} - -/********************************************************/ + return rv >= 0; +} + +/********************************************************/ bool socketSecureHandler::peek() { - //ssize_t rv = gnutls_record_check_pending(mSession); - //return rv>0; - return true; -} - -/********************************************************/ + //ssize_t rv = gnutls_record_check_pending(mSession); + //return rv>0; + return true; +} + +/********************************************************/ ssize_t socketSecureHandler::read(void* buffer, ssize_t buflen) { if (!IsValid_) - return false; + return false; return gnutls_record_recv(Session_, (char*)buffer, buflen); -} - -#endif - -/************************************************************/ +} + +#endif + +/************************************************************/ socketAbstractHandler* socketHandlerFactory::chooseHandler(const THttpURL& url) { if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTP) - return new socketRegularHandler; - + return new socketRegularHandler; + #ifdef USE_GNUTLS if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTPS) - return new socketSecureHandler; + return new socketSecureHandler; #endif - + return nullptr; -} - -/************************************************************/ -socketHandlerFactory socketHandlerFactory::sInstance; -/************************************************************/ +} + +/************************************************************/ +socketHandlerFactory socketHandlerFactory::sInstance; +/************************************************************/ diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h index 1c5b94a678..7fc251afd2 100644 --- a/library/cpp/http/fetch/httpfetcher.h +++ b/library/cpp/http/fetch/httpfetcher.h @@ -1,22 +1,22 @@ #pragma once - -#ifdef _MSC_VER + +#ifdef _MSC_VER #include <io.h> -#endif - +#endif + #include <library/cpp/http/misc/httpdate.h> - -#include "httpagent.h" -#include "httpparser.h" - -struct TFakeBackup { + +#include "httpagent.h" +#include "httpparser.h" + +struct TFakeBackup { int Write(void* /*buf*/, size_t /*size*/) { return 0; } -}; - -template <size_t bufsize = 5000> -struct TFakeAlloc { +}; + +template <size_t bufsize = 5000> +struct TFakeAlloc { void Shrink(void* /*buf*/, size_t /*size*/) { } void* Grab(size_t /*min*/, size_t* real) { @@ -24,17 +24,17 @@ struct TFakeAlloc { return buf; } char buf[bufsize]; -}; - +}; + template <typename TAlloc = TFakeAlloc<>, typename TCheck = TFakeCheck<>, - typename TWriter = TFakeBackup, + typename TWriter = TFakeBackup, typename TAgent = THttpAgent<>> class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent { -public: - static const size_t TCP_MIN = 1500; - static int TerminateNow; - +public: + static const size_t TCP_MIN = 1500; + static int TerminateNow; + THttpFetcher() : THttpParser<TCheck>() , TAlloc() @@ -47,54 +47,54 @@ public: } int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) { - int ret = 0; - int fetcherr = 0; - - THttpParser<TCheck>::Init(header, head_request); + int ret = 0; + int fetcherr = 0; + + THttpParser<TCheck>::Init(header, head_request); const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme()); size_t schemelen = strlen(scheme); - if (*path == '/') { + if (*path == '/') { header->base = TStringBuf(scheme, schemelen); header->base += TStringBuf("://", 3); header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg); header->base += path; - } else { + } else { if (strlen(path) >= FETCHER_URL_MAX) { header->error = HTTP_URL_TOO_LARGE; return 0; } header->base = path; - } - - if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) { - header->error = (i16)ret; - return 0; - } - - bool inheader = 1; + } + + if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) { + header->error = (i16)ret; + return 0; + } + + bool inheader = 1; void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr; - ssize_t got; - size_t buffree = 0, bufsize = 0, buflen = 0; - size_t maxsize = TCheck::GetMaxHeaderSize(); - do { - if (buffree < TCP_MIN) { - if (buf) { - TAlloc::Shrink(buf, buflen - buffree); - if (TWriter::Write(buf, buflen - buffree) < 0) { + ssize_t got; + size_t buffree = 0, bufsize = 0, buflen = 0; + size_t maxsize = TCheck::GetMaxHeaderSize(); + do { + if (buffree < TCP_MIN) { + if (buf) { + TAlloc::Shrink(buf, buflen - buffree); + if (TWriter::Write(buf, buflen - buffree) < 0) { buf = nullptr; - ret = EIO; - break; - } - } - if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) { - ret = ENOMEM; - break; - } - bufptr = buf; - buffree = buflen; - } - if ((got = TAgent::read(bufptr, buffree)) < 0) { - fetcherr = errno; + ret = EIO; + break; + } + } + if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) { + ret = ENOMEM; + break; + } + bufptr = buf; + buffree = buflen; + } + if ((got = TAgent::read(bufptr, buffree)) < 0) { + fetcherr = errno; if (errno == EINTR) header->error = HTTP_INTERRUPTED; else if (errno == ETIMEDOUT) @@ -102,43 +102,43 @@ public: else header->error = HTTP_CONNECTION_LOST; - break; - } - - parsebuf = bufptr; - bufptr = (char*)bufptr + got; - bufsize += got; - buffree -= got; - - THttpParser<TCheck>::Parse(parsebuf, got); - - if (header->error) + break; + } + + parsebuf = bufptr; + bufptr = (char*)bufptr + got; + bufsize += got; + buffree -= got; + + THttpParser<TCheck>::Parse(parsebuf, got); + + if (header->error) break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached - - if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) { - inheader = 0; - if (TCheck::Check(header)) - break; - if (header->header_size > (long)maxsize) { - header->error = HTTP_HEADER_TOO_LARGE; - break; - } + + if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) { + inheader = 0; + if (TCheck::Check(header)) + break; + if (header->header_size > (long)maxsize) { + header->error = HTTP_HEADER_TOO_LARGE; + break; + } } if (!inheader) { - maxsize = TCheck::GetMaxBodySize(header); - } - if (header->http_status >= HTTP_EXTENDED) - break; - if (bufsize > maxsize) { - header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE; - break; - } - if (TerminateNow) { - header->error = HTTP_INTERRUPTED; - break; - } - } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof); - + maxsize = TCheck::GetMaxBodySize(header); + } + if (header->http_status >= HTTP_EXTENDED) + break; + if (bufsize > maxsize) { + header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE; + break; + } + if (TerminateNow) { + header->error = HTTP_INTERRUPTED; + break; + } + } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof); + i64 Adjustment = 0; if (!header->error) { if (header->transfer_chunked) { @@ -150,22 +150,22 @@ public: Adjustment = 0; } - if (buf) { + if (buf) { TAlloc::Shrink(buf, buflen - buffree + Adjustment); - if (TWriter::Write(buf, buflen - buffree) < 0) - ret = EIO; - } - TCheck::CheckEndDoc(header); - if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) { - TAgent::Disconnect(); - if (!fetcherr) - fetcherr = errno; - } - errno = fetcherr; - return ret; - } -}; - -template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent> -int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0; + if (TWriter::Write(buf, buflen - buffree) < 0) + ret = EIO; + } + TCheck::CheckEndDoc(header); + if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) { + TAgent::Disconnect(); + if (!fetcherr) + fetcherr = errno; + } + errno = fetcherr; + return ret; + } +}; + +template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent> +int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0; diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h index 62a27b6561..c4abdcd0d2 100644 --- a/library/cpp/http/fetch/httpfsm.h +++ b/library/cpp/http/fetch/httpfsm.h @@ -1,13 +1,13 @@ #pragma once - + #include "httpheader.h" -#include <util/system/maxlen.h> +#include <util/system/maxlen.h> #include <util/datetime/parser.h> - + #include <time.h> -struct THttpHeaderParser { +struct THttpHeaderParser { static constexpr int ErrFirstlineTypeMismatch = -3; static constexpr int ErrHeader = -2; static constexpr int Err = -1; @@ -16,34 +16,34 @@ struct THttpHeaderParser { static constexpr int Accepted = 2; int Execute(const void* inBuf, size_t len) { - return execute((unsigned char*)inBuf, (int)len); - } - + return execute((unsigned char*)inBuf, (int)len); + } + int Execute(TStringBuf str) { return Execute(str.data(), str.size()); } int Init(THttpHeader* h) { int ret = Init((THttpBaseHeader*)(h)); - hd = h; - hd->Init(); + hd = h; + hd->Init(); hreflangpos = hd->hreflangs; hreflangspace = HREFLANG_MAX; return ret; - } - + } + int Init(THttpAuthHeader* h) { - int ret = Init((THttpHeader*)(h)); - auth_hd = h; - return ret; - } + int ret = Init((THttpHeader*)(h)); + auth_hd = h; + return ret; + } int Init(THttpRequestHeader* h) { int ret = Init((THttpBaseHeader*)(h)); request_hd = h; request_hd->Init(); return ret; } - + THttpHeader* hd; long I; int Dc; @@ -51,7 +51,7 @@ struct THttpHeaderParser { char buf[FETCHER_URL_MAX]; size_t buflen; char* lastchar; - + const unsigned char* langstart; size_t langlen; @@ -62,10 +62,10 @@ struct THttpHeaderParser { THttpAuthHeader* auth_hd; THttpRequestHeader* request_hd; - -private: + +private: THttpBaseHeader* base_hd; - int cs; + int cs; private: int Init(THttpBaseHeader* header) { @@ -78,27 +78,27 @@ private: } int execute(unsigned char* inBuf, int len); - void init(); -}; - -struct THttpChunkParser { + void init(); +}; + +struct THttpChunkParser { int Execute(const void* inBuf, int len) { - return execute((unsigned char*)inBuf, len); - } - - int Init() { - init(); - return 0; - } - + return execute((unsigned char*)inBuf, len); + } + + int Init() { + init(); + return 0; + } + int chunk_length; char* lastchar; long I; int Dc; i64 cnt64; - -private: - int cs; + +private: + int cs; int execute(unsigned char* inBuf, int len); - void init(); -}; + void init(); +}; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index 83557b144e..eab0328b18 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -1,70 +1,70 @@ -#include <stdio.h> -#include <time.h> - +#include <stdio.h> +#include <time.h> + #include <library/cpp/charset/doccodes.h> #include <library/cpp/charset/codepage.h> #include <library/cpp/http/misc/httpcodes.h> #include <util/datetime/base.h> #include <util/generic/ylimits.h> #include <algorithm> // max - + #include <library/cpp/http/fetch/httpheader.h> #include <library/cpp/http/fetch/httpfsm.h> - + #ifdef _MSC_VER #pragma warning(disable: 4702) // unreachable code #endif #define c(i) I = i; #define m(i) I = std::max(I, (long)i); - -static inline int X(unsigned char c) { - return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); -} - -template <typename x> -static inline void guard(x &val) { - val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 -} - -template <typename x> -static inline void setguarded(x &val, long cnt) { - val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; -} - -//////////////////////////////////////////////////////////////////// -/// HTTP PARSER -//////////////////////////////////////////////////////////////////// - -%%{ -machine http_header_parser; - + +static inline int X(unsigned char c) { + return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); +} + +template <typename x> +static inline void guard(x &val) { + val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 +} + +template <typename x> +static inline void setguarded(x &val, long cnt) { + val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; +} + +//////////////////////////////////////////////////////////////////// +/// HTTP PARSER +//////////////////////////////////////////////////////////////////// + +%%{ +machine http_header_parser; + include HttpDateTimeParser "../../../../util/datetime/parser.rl6"; -alphtype unsigned char; - -################# 2.2 Basic Rules ################# -eol = '\r'? '\n'; -ws = [ \t]; -lw = '\r'? '\n'? ws; +alphtype unsigned char; + +################# 2.2 Basic Rules ################# +eol = '\r'? '\n'; +ws = [ \t]; +lw = '\r'? '\n'? ws; separator = [()<>@,;:\\"/\[\]?={}]; -token_char = [!-~] - separator; # http tokens chars -url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars -text_char = ws | 33..126 | 128..255; -any_text_char = any - [\r\n]; - -lws = lw*; -eoh = lws eol; -token = token_char+; -ex_token = (token_char | ws)* token_char; -text = (text_char | lw)*; -any_text = (any_text_char | lw)*; -def = lws ':' lws; - +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..126 | 128..255; +any_text_char = any - [\r\n]; + +lws = lw*; +eoh = lws eol; +token = token_char+; +ex_token = (token_char | ws)* token_char; +text = (text_char | lw)*; +any_text = (any_text_char | lw)*; +def = lws ':' lws; + action clear_buf { buflen = 0; } action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; } - -################################################### + +################################################### ############ response status line ################# action set_minor { base_hd->http_minor = I; } action set_status { @@ -75,14 +75,14 @@ action set_status { return -3; } } - + status_code = int3; http_major = int; http_minor = int; reason_phrase = ws+ text_char*; http_version = "http/"i http_major '.' http_minor %set_minor; response_status_line = http_version ws+ status_code reason_phrase? eol %set_status; - + ############ request status line ################# action set_request_uri { if (request_hd && buflen < FETCHER_URL_MAX) { @@ -116,44 +116,44 @@ request_uri = (token_char | separator)+ >clear_buf $update_buf %set_request_uri; request_status_line = http_method ws+ request_uri ws+ http_version eoh; -################# connection ###################### +################# connection ###################### action beg_connection { guard(base_hd->connection_closed); I = -1; } action set_connection { setguarded(base_hd->connection_closed, I); } - -c_token = "close"i %{m(1)} - | "keep-alive"i %{m(0)}; -c_tokenlist = c_token (lws ',' lws c_token)?; + +c_token = "close"i %{m(1)} + | "keep-alive"i %{m(0)}; +c_tokenlist = c_token (lws ',' lws c_token)?; connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection; - -################# content-encoding ################ + +################# content-encoding ################ action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; } action set_content_encoding { base_hd->compression_method = ((base_hd->compression_method == HTTP_COMPRESSION_UNSET || base_hd->compression_method == I) ? I : (int)HTTP_COMPRESSION_ERROR); } - -ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} - | "gzip"i %{c(HTTP_COMPRESSION_GZIP)} - | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)} - | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)} - | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)} - | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)}; + +ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} + | "gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)} + | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)} + | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)}; content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding; - -################# transfer-encoding ############### + +################# transfer-encoding ############### action beg_encoding { guard(base_hd->transfer_chunked); } action set_encoding { setguarded(base_hd->transfer_chunked, I); } - -e_tokenlist = "identity"i %{c(0)} - | "chunked"i %{c(1)}; + +e_tokenlist = "identity"i %{c(0)} + | "chunked"i %{c(1)}; transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding; - -################# content-length ################## + +################# content-length ################## action beg_content_length { guard(base_hd->content_length); } action set_content_length { setguarded(base_hd->content_length, I); } - + content_length = "content-length"i def %beg_content_length int eoh %set_content_length; - + ################# content-range ################### action beg_content_range_start { guard(base_hd->content_range_start); I = -1; } action set_content_range_start { setguarded(base_hd->content_range_start, I); } @@ -166,7 +166,7 @@ content_range = "content-range"i def "bytes"i sp %beg_content_range_start int %beg_content_range_end int '/' %set_content_range_end %beg_content_range_el int eoh %set_content_range_el; -################# accept-ranges ################### +################# accept-ranges ################### action beg_accept_ranges { if (hd) { guard(hd->accept_ranges); @@ -174,21 +174,21 @@ action beg_accept_ranges { } } action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); } - -ar_tokenlist = "bytes"i %{c(1)} - | "none"i %{c(0)}; + +ar_tokenlist = "bytes"i %{c(1)} + | "none"i %{c(0)}; accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges; - -################# content-type #################### + +################# content-type #################### action beg_mime { guard(base_hd->mime_type); } action set_mime { setguarded(base_hd->mime_type, I); } action set_charset { if (buflen < FETCHER_URL_MAX) { - buf[buflen++] = 0; + buf[buflen++] = 0; base_hd->charset = EncodingHintByName((const char*)buf); - } -} - + } +} + mime_type = "text/plain"i %{c(MIME_TEXT)} | "text/html"i %{c(MIME_HTML)} | "application/pdf"i %{c(MIME_PDF)} @@ -234,36 +234,36 @@ mime_type = "text/plain"i %{c(MIME_TEXT)} charset_name = token_char+ >clear_buf $update_buf; mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2 - | token ws* '=' ws* '"'? token '"'? @1 - | text $0; -mime_parms = (lws ';' lws mime_param)*; + | token ws* '=' ws* '"'? token '"'? @1 + | text $0; +mime_parms = (lws ';' lws mime_param)*; content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime; - -################# last modified ################### + +################# last modified ################### action beg_modtime { guard(base_hd->http_time); } action set_modtime { setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1)); } - + last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime; - -################# location ######################## + +################# location ######################## action set_location { while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) { buflen --; } if (hd && buflen < FETCHER_URL_MAX) { hd->location = TStringBuf(buf, buflen); - } -} - + } +} + action set_status_303{ if (hd) hd->http_status = 303; } - + url = url_char+ >clear_buf $update_buf; loc_url = any_text_char+ >clear_buf $update_buf; location = "location"i def loc_url eoh %set_location; refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; - + ################# x-robots-tag ################ action set_x_robots { if (hd && AcceptingXRobots) { @@ -349,56 +349,56 @@ action set_squid_error { squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; -################# auth ######################## +################# auth ######################## action init_auth { - if (auth_hd) - auth_hd->use_auth=true; -} - + if (auth_hd) + auth_hd->use_auth=true; +} + action update_auth_buf - { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } - -quoted_str = /"/ (text_char - /"/)* /"/ >2; + { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } + +quoted_str = /"/ (text_char - /"/)* /"/ >2; auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2; - -# do not support auth-int, too heavy procedure - -qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; - -qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); - -auth_good_param = ( "nonce"i /=/ auth_quoted_str ) + +# do not support auth-int, too heavy procedure + +qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; + +qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); + +auth_good_param = ( "nonce"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->nonce = strdup((const char*)buf); - }} - | ( "realm"i /=/ auth_quoted_str ) + buf[buflen++] = 0; + auth_hd->nonce = strdup((const char*)buf); + }} + | ( "realm"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->realm = strdup((const char*)buf); - }} - | ( "opaque"i /=/ auth_quoted_str ) + buf[buflen++] = 0; + auth_hd->realm = strdup((const char*)buf); + }} + | ( "opaque"i /=/ auth_quoted_str ) %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { - buf[buflen++] = 0; - auth_hd->opaque = strdup((const char*)buf); - }} - | "stale"i /=/ "true"i - %{if (auth_hd) auth_hd->stale = true; } - | "algorithm"i /=/ "md5"i /-/ "sess"i - %{if (auth_hd) auth_hd->algorithm = 1; } - | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); - -auth_param = auth_good_param @1 | - ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) - /=/ (token | quoted_str ) ) $0; - -auth_params = auth_param ( ws* /,/ ws* auth_param )*; - + buf[buflen++] = 0; + auth_hd->opaque = strdup((const char*)buf); + }} + | "stale"i /=/ "true"i + %{if (auth_hd) auth_hd->stale = true; } + | "algorithm"i /=/ "md5"i /-/ "sess"i + %{if (auth_hd) auth_hd->algorithm = 1; } + | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); + +auth_param = auth_good_param @1 | + ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) + /=/ (token | quoted_str ) ) $0; + +auth_params = auth_param ( ws* /,/ ws* auth_param )*; + digest_challenge = ("digest"i %init_auth ws+ auth_params) | - ((token-"digest"i) text); - -auth = "www-authenticate"i def digest_challenge eoh; - + ((token-"digest"i) text); + +auth = "www-authenticate"i def digest_challenge eoh; + ###################### host ####################### action set_host { if (request_hd && buflen < HOST_MAX) { @@ -562,8 +562,8 @@ action set_request_priority { request_priority = "x-yandex-request-priority"i def int eoh %set_request_priority; -################# message header ################## -other_header = ( ex_token - "www-authenticate"i ) def any_text eoh; +################# message header ################## +other_header = ( ex_token - "www-authenticate"i ) def any_text eoh; message_header = other_header $0 | connection @1 | content_encoding @1 @@ -595,90 +595,90 @@ request_header = message_header $0 | request_cache_control @1 | response_timeout @1 | request_priority @1; - -################# main ############################ + +################# main ############################ action accepted { lastchar = (char*)fpc; return 2; } - + main := ((response_status_line ('\r'? response_header)*) | (request_status_line ('\r' ? request_header)*)) eol @accepted; - -}%% - -%% write data; - -int THttpHeaderParser::execute(unsigned char *inBuf, int len) { - const unsigned char *p = inBuf; - const unsigned char *pe = p + len; - %% write exec; - if (cs == http_header_parser_error) - return -1; - else if (cs == http_header_parser_first_final) - return 0; - else - return 1; -} - -void THttpHeaderParser::init() { - %% write init; -} - -%%{ -machine http_chunk_parser; - -alphtype unsigned char; - + +}%% + +%% write data; + +int THttpHeaderParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_header_parser_error) + return -1; + else if (cs == http_header_parser_first_final) + return 0; + else + return 1; +} + +void THttpHeaderParser::init() { + %% write init; +} + +%%{ +machine http_chunk_parser; + +alphtype unsigned char; + action clear_hex { cnt64 = 0; } action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; } action set_chunk { chunk_length = static_cast<int>(cnt64); } action accepted { lastchar = (char*)fpc; return 2; } - -eol = '\r'? '\n'; -ws = [ \t]; -sp = ' '; -lw = '\r'? '\n'? ws; -separator = [()<>@,;:\\"/\[\]?={}]; -token_char = [!-~] - separator; # http tokens chars -url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars -text_char = ws | 33..127 | 160..255; - -lws = lw*; -eoh = lws eol; -token = token_char+; -text = (text_char | lw)*; -def = lws ':' lws; - + +eol = '\r'? '\n'; +ws = [ \t]; +sp = ' '; +lw = '\r'? '\n'? ws; +separator = [()<>@,;:\\"/\[\]?={}]; +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..127 | 160..255; + +lws = lw*; +eoh = lws eol; +token = token_char+; +text = (text_char | lw)*; +def = lws ':' lws; + hex = (xdigit+) >clear_hex $update_hex; -quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; - -chunk_ext_val = token | quoted_string; -chunk_ext_name = token; -chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; - -entity_header = token def text eoh; -trailer = entity_header*; - +quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; + +chunk_ext_val = token | quoted_string; +chunk_ext_name = token; +chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; + +entity_header = token def text eoh; +trailer = entity_header*; + chunk = (hex - '0'+) chunk_extension? %set_chunk; -last_chunk = '0'+ chunk_extension? eol trailer; +last_chunk = '0'+ chunk_extension? eol trailer; main := eol (chunk $0 | last_chunk @1) eol @accepted; - -}%% - -%% write data; - -int THttpChunkParser::execute(unsigned char *inBuf, int len) { - const unsigned char *p = inBuf; - const unsigned char *pe = p + len; - %% write exec; - if (cs == http_chunk_parser_error) - return -1; - else if (cs == http_chunk_parser_first_final) - return 0; - else - return 1; -} - -void THttpChunkParser::init() { - chunk_length = 0; - %% write init; -} + +}%% + +%% write data; + +int THttpChunkParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_chunk_parser_error) + return -1; + else if (cs == http_chunk_parser_first_final) + return 0; + else + return 1; +} + +void THttpChunkParser::init() { + chunk_length = 0; + %% write init; +} diff --git a/library/cpp/http/fetch/httpheader.h b/library/cpp/http/fetch/httpheader.h index 20f8e0956b..b2810bbd41 100644 --- a/library/cpp/http/fetch/httpheader.h +++ b/library/cpp/http/fetch/httpheader.h @@ -1,5 +1,5 @@ #pragma once - + #include "exthttpcodes.h" #include <library/cpp/mime/types/mime.h> @@ -11,11 +11,11 @@ #include <util/system/maxlen.h> #include <ctime> -#include <cstdio> -#include <cstdlib> -#include <cstring> +#include <cstdio> +#include <cstdlib> +#include <cstring> #include <algorithm> - + // This is ugly solution but here a lot of work to do it the right way. #define FETCHER_URL_MAX 8192 @@ -29,16 +29,16 @@ extern const i32 DEFAULT_RESPONSE_TIMEOUT; /// == -1 #define MAX_LANGREGION_LEN 4 #define MAXWORD_LEN 55 -enum HTTP_COMPRESSION { +enum HTTP_COMPRESSION { HTTP_COMPRESSION_UNSET = 0, HTTP_COMPRESSION_ERROR = 1, - HTTP_COMPRESSION_IDENTITY = 2, + HTTP_COMPRESSION_IDENTITY = 2, HTTP_COMPRESSION_GZIP = 3, HTTP_COMPRESSION_DEFLATE = 4, - HTTP_COMPRESSION_COMPRESS = 5, + HTTP_COMPRESSION_COMPRESS = 5, HTTP_COMPRESSION_MAX = 6 -}; - +}; + enum HTTP_METHOD { HTTP_METHOD_UNDEFINED = -1, HTTP_METHOD_OPTIONS, @@ -78,25 +78,25 @@ public: TString base; public: - void Init() { + void Init() { error = 0; - header_size = 0; - entity_size = 0; - content_length = -1; - http_time = -1; - http_minor = -1; - mime_type = -1; - charset = -1; - compression_method = HTTP_COMPRESSION_UNSET; - transfer_chunked = -1; + header_size = 0; + entity_size = 0; + content_length = -1; + http_time = -1; + http_minor = -1; + mime_type = -1; + charset = -1; + compression_method = HTTP_COMPRESSION_UNSET; + transfer_chunked = -1; connection_closed = HTTP_CONNECTION_UNDEFINED; content_range_start = -1; content_range_end = -1; content_range_entity_length = -1; base.clear(); - } - - void Print() const { + } + + void Print() const { printf("content_length: %" PRIi64 "\n", content_length); printf("http_time: %" PRIi64 "\n", http_time); printf("http_minor: %" PRIi8 "\n", http_minor); @@ -110,22 +110,22 @@ public: printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length); printf("base: \"%s\"\n", base.c_str()); printf("error: %" PRIi16 "\n", error); - } - - int SetBase(const char* path, + } + + int SetBase(const char* path, const char* hostNamePtr = nullptr, int hostNameLength = 0) { if (*path == '/') { base = "http://"; base += TStringBuf(hostNamePtr, hostNameLength); base += path; - } else { + } else { base = path; - } - return error; - } -}; - + } + return error; + } +}; + enum { HREFLANG_MAX = FETCHER_URL_MAX * 2 }; /// Class represents Http Response Header. struct THttpHeader: public THttpBaseHeader { @@ -165,7 +165,7 @@ public: }; struct THttpRequestHeader: public THttpBaseHeader { -public: +public: TString request_uri; char host[HOST_MAX]; char from[MAXWORD_LEN]; @@ -184,7 +184,7 @@ public: THttpRequestHeader() { Init(); } - + void Init() { request_uri.clear(); host[0] = 0; @@ -201,7 +201,7 @@ public: if_modified_since = DEFAULT_IF_MODIFIED_SINCE; THttpBaseHeader::Init(); } - + void Print() const { THttpBaseHeader::Print(); printf("request_uri: \"%s\"\n", request_uri.c_str()); @@ -213,7 +213,7 @@ public: printf("max_age: %" PRIi32 "\n", max_age); printf("if_modified_since: %" PRIi64 "\n", if_modified_since); } - + /// It doesn't care about errors in request or headers, where /// request_uri equals to '*'. /// This returns copy of the string, which you have to delete. @@ -225,20 +225,20 @@ public: url = HTTP_PREFIX; url += host; url += request_uri; - } + } return url; } - + char* GetUrl(char* buffer, size_t size) { if (host[0] == 0 || !strcmp(host, "")) { strlcpy(buffer, request_uri.c_str(), size); } else { snprintf(buffer, size, "http://%s%s", host, request_uri.c_str()); - } + } return buffer; } }; - + class THttpAuthHeader: public THttpHeader { public: char* realm; @@ -282,6 +282,6 @@ public: printf("stale: %d\n", stale); printf("algorithm: %d\n", algorithm); printf("qop_auth: %d\n", qop_auth); - } + } } -}; +}; diff --git a/library/cpp/http/fetch/httpload.cpp b/library/cpp/http/fetch/httpload.cpp index f944d7906a..82ea8900b5 100644 --- a/library/cpp/http/fetch/httpload.cpp +++ b/library/cpp/http/fetch/httpload.cpp @@ -1,7 +1,7 @@ -#include "httpload.h" - -/************************************************************/ -/************************************************************/ +#include "httpload.h" + +/************************************************************/ +/************************************************************/ httpAgentReader::httpAgentReader(httpSpecialAgent& agent, const char* baseUrl, bool assumeConnectionClosed, @@ -13,109 +13,109 @@ httpAgentReader::httpAgentReader(httpSpecialAgent& agent, , BufPtr_(Buffer_) , BufSize_(bufSize) , BufRest_(0) -{ - HeadRequest = false; +{ + HeadRequest = false; Header = &Header_; - if (use_auth) + if (use_auth) HeaderParser.Init(&Header_); - else - HeaderParser.Init(Header); + else + HeaderParser.Init(Header); setAssumeConnectionClosed(assumeConnectionClosed ? 1 : 0); Header_.SetBase(baseUrl); - + if (Header_.error) - State = hp_error; - else - State = hp_in_header; -} - -/************************************************************/ + State = hp_error; + else + State = hp_in_header; +} + +/************************************************************/ httpAgentReader::~httpAgentReader() { delete[] Buffer_; -} - -/************************************************************/ +} + +/************************************************************/ void httpAgentReader::readBuf() { assert(BufRest_ == 0); if (!BufPtr_) { BufRest_ = -1; - return; - } - + return; + } + BufRest_ = Agent_.read(Buffer_, BufSize_); if (BufRest_ <= 0) { BufRest_ = -1; BufPtr_ = nullptr; - } else { + } else { BufPtr_ = Buffer_; - //cout << "BUF: " << mBuffer << endl << endl; - } -} - -/************************************************************/ + //cout << "BUF: " << mBuffer << endl << endl; + } +} + +/************************************************************/ const THttpHeader* httpAgentReader::readHeader() { while (State == hp_in_header) { if (!step()) { Header_.error = HTTP_CONNECTION_LOST; return nullptr; - } + } ParseGeneric(BufPtr_, BufRest_); - } + } if (State == hp_eof || State == hp_error) { BufPtr_ = nullptr; BufRest_ = -1; - } + } if (State == hp_error || Header_.error) return nullptr; return &Header_; -} - -/************************************************************/ +} + +/************************************************************/ long httpAgentReader::readPortion(void*& buf) { assert(State != hp_in_header); - - long Chunk = 0; + + long Chunk = 0; do { if (BufSize_ == 0 && !BufPtr_) - return 0; - - if (!step()) - return 0; - + return 0; + + if (!step()) + return 0; + Chunk = ParseGeneric(BufPtr_, BufRest_); buf = BufPtr_; - + if (State == hp_error && Header_.entity_size > Header_.content_length) { Chunk -= (Header_.entity_size - Header_.content_length); BufPtr_ = (char*)BufPtr_ + Chunk; BufRest_ = 0; State = hp_eof; Header_.error = 0; - break; - } - + break; + } + BufPtr_ = (char*)BufPtr_ + Chunk; BufRest_ -= Chunk; - + if (State == hp_eof || State == hp_error) { BufRest_ = -1; BufPtr_ = nullptr; - } - } while (!Chunk); - return Chunk; -} - -/************************************************************/ + } + } while (!Chunk); + return Chunk; +} + +/************************************************************/ bool httpAgentReader::skipTheRest() { void* b; - while (!eof()) - readPortion(b); + while (!eof()) + readPortion(b); return (State == hp_eof); -} - -/************************************************************/ -/************************************************************/ +} + +/************************************************************/ +/************************************************************/ httpLoadAgent::httpLoadAgent(bool handleAuthorization, socketHandlerFactory& factory) : Factory_(factory) @@ -126,82 +126,82 @@ httpLoadAgent::httpLoadAgent(bool handleAuthorization, , Headers_() , ErrCode_(0) , RealHost_(nullptr) -{ -} - -/************************************************************/ +{ +} + +/************************************************************/ httpLoadAgent::~httpLoadAgent() { delete Reader_; free(RealHost_); -} - -/************************************************************/ +} + +/************************************************************/ void httpLoadAgent::clearReader() { if (Reader_) { - bool opened = false; + bool opened = false; if (PersistentConn_) { const THttpHeader* H = Reader_->readHeader(); if (H && !H->connection_closed) { Reader_->skipTheRest(); - opened = true; - } - } - if (!opened) - Disconnect(); + opened = true; + } + } + if (!opened) + Disconnect(); delete Reader_; Reader_ = nullptr; - } + } ErrCode_ = 0; -} -/************************************************************/ +} +/************************************************************/ void httpLoadAgent::setRealHost(const char* hostname) { free(RealHost_); - if (hostname) + if (hostname) RealHost_ = strdup(hostname); - else + else RealHost_ = nullptr; ErrCode_ = 0; -} - -/************************************************************/ +} + +/************************************************************/ void httpLoadAgent::setIMS(const char* ifModifiedSince) { - char ims_buf[100]; - snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n", - ifModifiedSince); + char ims_buf[100]; + snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n", + ifModifiedSince); Headers_.push_back(ims_buf); -} - -/************************************************************/ +} + +/************************************************************/ void httpLoadAgent::addHeaderInstruction(const char* instr) { Headers_.push_back(instr); -} - -/************************************************************/ +} + +/************************************************************/ void httpLoadAgent::dropHeaderInstructions() { Headers_.clear(); -} - -/************************************************************/ +} + +/************************************************************/ bool httpLoadAgent::startRequest(const THttpURL& url, bool persistent, const TAddrList& addrs) -{ - clearReader(); +{ + clearReader(); ErrCode_ = 0; - + URL_.Clear(); URL_ = url; PersistentConn_ = persistent; if (!URL_.IsValidAbs()) - return false; + return false; if (!HandleAuthorization_ && !URL_.IsNull(THttpURL::FlagAuth)) - return false; - + return false; + return doSetHost(addrs) && doStartRequest(); -} - -/************************************************************/ +} + +/************************************************************/ bool httpLoadAgent::startRequest(const char* url, const char* url_to_merge, bool persistent, @@ -226,148 +226,148 @@ bool httpLoadAgent::startRequest(const char* url, const char* url_to_merge, bool persistent, ui32 ip) { - clearReader(); - + clearReader(); + URL_.Clear(); PersistentConn_ = persistent; - - long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; + + long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; if (HandleAuthorization_) - flags |= THttpURL::FeatureAuthSupported; - + flags |= THttpURL::FeatureAuthSupported; + if (URL_.Parse(url, flags, url_to_merge) || !URL_.IsValidGlobal()) - return false; - + return false; + return doSetHost(TAddrList::MakeV4Addr(ip, URL_.GetPort())) && doStartRequest(); -} - -/************************************************************/ +} + +/************************************************************/ bool httpLoadAgent::doSetHost(const TAddrList& addrs) { socketAbstractHandler* h = Factory_.chooseHandler(URL_); - if (!h) - return false; - Socket.setHandler(h); - + if (!h) + return false; + Socket.setHandler(h); + if (addrs.size()) { ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost), URL_.GetPort(), addrs); - } else { + } else { ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost), URL_.GetPort()); - } + } if (ErrCode_) - return false; - + return false; + if (RealHost_) { - free(Hostheader); + free(Hostheader); Hostheader = (char*)malloc(strlen(RealHost_) + 20); sprintf(Hostheader, "Host: %s\r\n", RealHost_); - } - + } + if (!URL_.IsNull(THttpURL::FlagAuth)) { if (!HandleAuthorization_) { ErrCode_ = HTTP_UNAUTHORIZED; - return false; - } - + return false; + } + Digest_.setAuthorization(URL_.Get(THttpURL::FieldUsername), URL_.Get(THttpURL::FieldPassword)); - } - - return true; -} - -/************************************************************/ + } + + return true; +} + +/************************************************************/ bool httpLoadAgent::setHost(const char* host_url, const TAddrList& addrs) { - clearReader(); - + clearReader(); + URL_.Clear(); PersistentConn_ = true; - - long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; + + long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; if (HandleAuthorization_) - flags |= THttpURL::FeatureAuthSupported; - + flags |= THttpURL::FeatureAuthSupported; + if (URL_.Parse(host_url, flags) || !URL_.IsValidGlobal()) - return false; - + return false; + return doSetHost(addrs); -} - -/************************************************************/ +} + +/************************************************************/ bool httpLoadAgent::startOneRequest(const char* local_url) { - clearReader(); - - THttpURL lURL; + clearReader(); + + THttpURL lURL; if (lURL.Parse(local_url, THttpURL::FeaturesNormalizeSet) || lURL.IsValidGlobal()) - return false; - + return false; + URL_.SetInMemory(THttpURL::FieldPath, lURL.Get(THttpURL::FieldPath)); URL_.SetInMemory(THttpURL::FieldQuery, lURL.Get(THttpURL::FieldQuery)); URL_.Rewrite(); - - return doStartRequest(); -} - -/************************************************************/ + + return doStartRequest(); +} + +/************************************************************/ bool httpLoadAgent::doStartRequest() { TString urlStr = URL_.PrintS(THttpURL::FlagPath | THttpURL::FlagQuery); - if (!urlStr) - urlStr = "/"; - + if (!urlStr) + urlStr = "/"; + for (int step = 0; step < 10; step++) { const char* digestHeader = Digest_.getHeaderInstruction(); - + unsigned i = (digestHeader) ? 2 : 1; - const char** headers = + const char** headers = (const char**)(alloca((i + Headers_.size()) * sizeof(char*))); - + for (i = 0; i < Headers_.size(); i++) headers[i] = Headers_[i].c_str(); - if (digestHeader) - headers[i++] = digestHeader; + if (digestHeader) + headers[i++] = digestHeader; headers[i] = nullptr; - + ErrCode_ = RequestGet(urlStr.c_str(), headers, PersistentConn_); - + if (ErrCode_) { - Disconnect(); - return false; - } - + Disconnect(); + return false; + } + TString urlBaseStr = URL_.PrintS(THttpURL::FlagNoFrag); - - clearReader(); + + clearReader(); Reader_ = new httpAgentReader(*this, urlBaseStr.c_str(), !PersistentConn_, !Digest_.empty()); - + if (Reader_->readHeader()) { - //mReader->getHeader()->Print(); + //mReader->getHeader()->Print(); if (getHeader()->http_status == HTTP_UNAUTHORIZED && step < 1 && Digest_.processHeader(getAuthHeader(), urlStr.c_str(), "GET")) { - //mReader->skipTheRest(); + //mReader->skipTheRest(); delete Reader_; Reader_ = nullptr; ErrCode_ = 0; - Disconnect(); - continue; - } - - return true; - } - Disconnect(); - clearReader(); - - return false; - } - + Disconnect(); + continue; + } + + return true; + } + Disconnect(); + clearReader(); + + return false; + } + ErrCode_ = HTTP_UNAUTHORIZED; - return false; -} - -/************************************************************/ -/************************************************************/ + return false; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/httpload.h b/library/cpp/http/fetch/httpload.h index 1441dd27b5..e22e4b809e 100644 --- a/library/cpp/http/fetch/httpload.h +++ b/library/cpp/http/fetch/httpload.h @@ -1,226 +1,226 @@ #pragma once - + #include "httpagent.h" #include "httpparser.h" #include "http_digest.h" -#include <util/system/compat.h> -#include <util/string/vector.h> +#include <util/system/compat.h> +#include <util/string/vector.h> #include <util/network/ip.h> #include <library/cpp/uri/http_url.h> #include <library/cpp/http/misc/httpcodes.h> - -/********************************************************/ -// Section 1: socket handlers -/********************************************************/ -// The following classes allows to adopt template scheme -// THttpAgent for work with socket by flexible -// object-style scheme. - -/********************************************************/ -// This class is used as a base one for flexible -// socket handling + +/********************************************************/ +// Section 1: socket handlers +/********************************************************/ +// The following classes allows to adopt template scheme +// THttpAgent for work with socket by flexible +// object-style scheme. + +/********************************************************/ +// This class is used as a base one for flexible +// socket handling class socketAbstractHandler { -public: - virtual bool Good() = 0; - +public: + virtual bool Good() = 0; + virtual int Connect(const TAddrList& addrs, TDuration Timeout) = 0; - - virtual void Disconnect() = 0; - - virtual void shutdown() = 0; - - virtual bool send(const char* message, ssize_t messlen) = 0; - - virtual bool peek() = 0; - - virtual ssize_t read(void* buffer, ssize_t buflen) = 0; - + + virtual void Disconnect() = 0; + + virtual void shutdown() = 0; + + virtual bool send(const char* message, ssize_t messlen) = 0; + + virtual bool peek() = 0; + + virtual ssize_t read(void* buffer, ssize_t buflen) = 0; + virtual ~socketAbstractHandler() { } - -protected: + +protected: socketAbstractHandler() { } -}; - -/********************************************************/ -// This class is used as a proxy between THttpAgent and -// socketAbstractHandler -// (it is used by template scheme, -// so it does not have virtual methods) +}; + +/********************************************************/ +// This class is used as a proxy between THttpAgent and +// socketAbstractHandler +// (it is used by template scheme, +// so it does not have virtual methods) class TSocketHandlerPtr { -protected: +protected: socketAbstractHandler* Handler_; - -public: + +public: TSocketHandlerPtr() : Handler_(nullptr) { } - + virtual ~TSocketHandlerPtr() { delete Handler_; } - + int Good() { return (Handler_ && Handler_->Good()); } - + int Connect(const TAddrList& addrs, TDuration Timeout) { return (Handler_) ? Handler_->Connect(addrs, Timeout) : 1; - } - + } + void Disconnect() { if (Handler_) Handler_->Disconnect(); - } - + } + void shutdown() { if (Handler_) Handler_->shutdown(); - } - + } + bool send(const char* message, ssize_t messlen) { return (Handler_) ? Handler_->send(message, messlen) : false; - } - + } + virtual bool peek() { return (Handler_) ? Handler_->peek() : false; - } - + } + virtual ssize_t read(void* buffer, ssize_t buflen) { return (Handler_) ? Handler_->read(buffer, buflen) : 0; - } - + } + void setHandler(socketAbstractHandler* handler) { if (Handler_) delete Handler_; Handler_ = handler; - } -}; - -/********************************************************/ -// Here is httpAgent that uses socketAbstractHandler class -// ant its derivatives + } +}; + +/********************************************************/ +// Here is httpAgent that uses socketAbstractHandler class +// ant its derivatives using httpSpecialAgent = THttpAgent<TSocketHandlerPtr>; - -/********************************************************/ -// Regular handler is used as implementation of -// socketAbstractHandler for work through HTTP protocol + +/********************************************************/ +// Regular handler is used as implementation of +// socketAbstractHandler for work through HTTP protocol class socketRegularHandler: public socketAbstractHandler { protected: TSimpleSocketHandler Socket_; - + public: socketRegularHandler() : Socket_() { } - + bool Good() override { return Socket_.Good(); - } - + } + int Connect(const TAddrList& addrs, TDuration Timeout) override { return Socket_.Connect(addrs, Timeout); - } - + } + void Disconnect() override { Socket_.Disconnect(); - } - + } + void shutdown() override { - //Do not block writing to socket - //There are servers that works in a bad way with this - //mSocket.shutdown(); - } - + //Do not block writing to socket + //There are servers that works in a bad way with this + //mSocket.shutdown(); + } + bool send(const char* message, ssize_t messlen) override { return Socket_.send(message, messlen); - } - + } + bool peek() override { return Socket_.peek(); - } - + } + ssize_t read(void* buffer, ssize_t buflen) override { return Socket_.read(buffer, buflen); - } -}; - -/********************************************************/ -// The base factory that allows to choose an appropriate -// socketAbstractHandler implementation by url schema + } +}; + +/********************************************************/ +// The base factory that allows to choose an appropriate +// socketAbstractHandler implementation by url schema class socketHandlerFactory { public: virtual ~socketHandlerFactory() { } - - //returns mHandler_HTTP for correct HTTP-based url + + //returns mHandler_HTTP for correct HTTP-based url virtual socketAbstractHandler* chooseHandler(const THttpURL& url); - - static socketHandlerFactory sInstance; -}; - -/********************************************************/ -// Section 2: the configurates tool to parse an HTTP-response -/********************************************************/ - + + static socketHandlerFactory sInstance; +}; + +/********************************************************/ +// Section 2: the configurates tool to parse an HTTP-response +/********************************************************/ + class httpAgentReader: public THttpParserGeneric<1> { protected: THttpAuthHeader Header_; httpSpecialAgent& Agent_; - + char* Buffer_; void* BufPtr_; int BufSize_; long BufRest_; - - void readBuf(); - + + void readBuf(); + bool step() { if (BufRest_ == 0) - readBuf(); - if (eof()) - return false; - return true; - } - + readBuf(); + if (eof()) + return false; + return true; + } + public: httpAgentReader(httpSpecialAgent& agent, const char* baseUrl, bool assumeConnectionClosed, bool use_auth = false, int bufSize = 0x1000); - - ~httpAgentReader(); - + + ~httpAgentReader(); + bool eof() { return BufRest_ < 0; - } - + } + int error() { return Header_.error; - } - + } + void setError(int errCode) { Header_.error = errCode; - } - + } + const THttpAuthHeader* getAuthHeader() { return &Header_; - } - + } + const THttpHeader* readHeader(); long readPortion(void*& buf); bool skipTheRest(); -}; - -/********************************************************/ -// Section 3: the main class -/********************************************************/ +}; + +/********************************************************/ +// Section 3: the main class +/********************************************************/ class httpLoadAgent: public httpSpecialAgent { protected: socketHandlerFactory& Factory_; @@ -232,76 +232,76 @@ protected: int ErrCode_; char* RealHost_; httpDigestHandler Digest_; - - void clearReader(); + + void clearReader(); bool doSetHost(const TAddrList& addrs); - bool doStartRequest(); - + bool doStartRequest(); + public: httpLoadAgent(bool handleAuthorization = false, socketHandlerFactory& factory = socketHandlerFactory::sInstance); - ~httpLoadAgent(); - - void setRealHost(const char* host); + ~httpLoadAgent(); + + void setRealHost(const char* host); void setIMS(const char* ifModifiedSince); - void addHeaderInstruction(const char* instr); - void dropHeaderInstructions(); - - bool startRequest(const char* url, + void addHeaderInstruction(const char* instr); + void dropHeaderInstructions(); + + bool startRequest(const char* url, const char* url_to_merge = nullptr, bool persistent = false, const TAddrList& addrs = TAddrList()); - + // deprecated v4-only bool startRequest(const char* url, const char* url_to_merge, bool persistent, ui32 ip); - bool startRequest(const THttpURL& url, + bool startRequest(const THttpURL& url, bool persistent = false, const TAddrList& addrs = TAddrList()); - + bool setHost(const char* host_url, const TAddrList& addrs = TAddrList()); - - bool startOneRequest(const char* local_url); - + + bool startOneRequest(const char* local_url); + const THttpAuthHeader* getAuthHeader() { if (Reader_ && Reader_->getAuthHeader()->use_auth) return Reader_->getAuthHeader(); return nullptr; - } - + } + const THttpHeader* getHeader() { if (Reader_) return Reader_->getAuthHeader(); return nullptr; - } - + } + const THttpURL& getURL() { return URL_; - } - + } + bool eof() { if (Reader_) return Reader_->eof(); - return true; - } - + return true; + } + int error() { if (ErrCode_) return ErrCode_; if (Reader_) return Reader_->error(); - return HTTP_BAD_URL; - } - + return HTTP_BAD_URL; + } + long readPortion(void*& buf) { if (Reader_) return Reader_->readPortion(buf); - return -1; - } -}; - -/********************************************************/ + return -1; + } +}; + +/********************************************************/ diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h index b666707038..769828e4ae 100644 --- a/library/cpp/http/fetch/httpparser.h +++ b/library/cpp/http/fetch/httpparser.h @@ -1,14 +1,14 @@ #pragma once - -#include "httpfsm.h" -#include "httpheader.h" - + +#include "httpfsm.h" +#include "httpheader.h" + #include <library/cpp/mime/types/mime.h> #include <util/system/yassert.h> #include <library/cpp/http/misc/httpcodes.h> template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20> -struct TFakeCheck { +struct TFakeCheck { bool Check(THttpHeader* /*header*/) { return false; } @@ -22,351 +22,351 @@ struct TFakeCheck { size_t GetMaxBodySize(THttpHeader*) { return bodymax; } -}; - -class THttpParserBase { -public: - enum States { - hp_error, - hp_eof, - hp_in_header, - hp_read_alive, - hp_read_closed, - hp_begin_chunk_header, - hp_chunk_header, - hp_read_chunk - }; - +}; + +class THttpParserBase { +public: + enum States { + hp_error, + hp_eof, + hp_in_header, + hp_read_alive, + hp_read_closed, + hp_begin_chunk_header, + hp_chunk_header, + hp_read_chunk + }; + States GetState() { return State; } - + void setAssumeConnectionClosed(int value) { - AssumeConnectionClosed = value; - } - + AssumeConnectionClosed = value; + } + THttpHeader* GetHttpHeader() const { return Header; } - -protected: - int CheckHeaders() { + +protected: + int CheckHeaders() { if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) { - Header->content_length = 0; - Header->transfer_chunked = 0; - } - if (Header->transfer_chunked < -1) { - Header->error = HTTP_BAD_ENCODING; - return 1; - } else if (Header->transfer_chunked == -1) { - Header->transfer_chunked = 0; - } - if (!Header->transfer_chunked && Header->content_length < -1) { - Header->error = HTTP_BAD_CONTENT_LENGTH; - return 1; - } + Header->content_length = 0; + Header->transfer_chunked = 0; + } + if (Header->transfer_chunked < -1) { + Header->error = HTTP_BAD_ENCODING; + return 1; + } else if (Header->transfer_chunked == -1) { + Header->transfer_chunked = 0; + } + if (!Header->transfer_chunked && Header->content_length < -1) { + Header->error = HTTP_BAD_CONTENT_LENGTH; + return 1; + } if (Header->http_status == HTTP_OK) { - if (Header->compression_method != HTTP_COMPRESSION_UNSET && - Header->compression_method != HTTP_COMPRESSION_IDENTITY && - Header->compression_method != HTTP_COMPRESSION_GZIP && + if (Header->compression_method != HTTP_COMPRESSION_UNSET && + Header->compression_method != HTTP_COMPRESSION_IDENTITY && + Header->compression_method != HTTP_COMPRESSION_GZIP && Header->compression_method != HTTP_COMPRESSION_DEFLATE) { - Header->error = HTTP_BAD_CONTENT_ENCODING; - return 1; - } - } - if (Header->connection_closed == -1) - Header->connection_closed = (Header->http_minor == 0 || - AssumeConnectionClosed); + Header->error = HTTP_BAD_CONTENT_ENCODING; + return 1; + } + } + if (Header->connection_closed == -1) + Header->connection_closed = (Header->http_minor == 0 || + AssumeConnectionClosed); if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) { - Header->error = HTTP_LENGTH_UNKNOWN; - return 1; - } - if (Header->http_time < 0) - Header->http_time = 0; - if (Header->mime_type < 0) - Header->mime_type = MIME_UNKNOWN; - return 0; - } - - THttpHeaderParser HeaderParser; - THttpChunkParser ChunkParser; - States State; - long ChunkSize; + Header->error = HTTP_LENGTH_UNKNOWN; + return 1; + } + if (Header->http_time < 0) + Header->http_time = 0; + if (Header->mime_type < 0) + Header->mime_type = MIME_UNKNOWN; + return 0; + } + + THttpHeaderParser HeaderParser; + THttpChunkParser ChunkParser; + States State; + long ChunkSize; THttpHeader* Header; - int AssumeConnectionClosed; - bool HeadRequest; -}; - + int AssumeConnectionClosed; + bool HeadRequest; +}; + template <int isReader, typename TCheck = TFakeCheck<>> -class THttpParserGeneric: public THttpParserBase, public TCheck { -protected: +class THttpParserGeneric: public THttpParserBase, public TCheck { +protected: long ParseGeneric(void*& buf, long& size) { - if (!size) { - switch (State) { - case hp_error: - case hp_eof: - break; - case hp_read_closed: - State = hp_eof; - break; - case hp_in_header: - Header->error = HTTP_HEADER_EOF; - State = hp_error; - break; - case hp_read_alive: - case hp_read_chunk: - if (HeadRequest) - State = hp_eof; - else { - Header->error = HTTP_MESSAGE_EOF; - State = hp_error; - } - break; - case hp_begin_chunk_header: - case hp_chunk_header: - if (HeadRequest) - State = hp_eof; - else { - Header->error = HTTP_CHUNK_EOF; - State = hp_error; - } - break; - } - return 0; - } - while (size) { - int ret; - - switch (State) { - case hp_error: - return 0; - - case hp_eof: - return 0; - - case hp_in_header: - if ((ret = HeaderParser.Execute(buf, size)) < 0) { - Header->error = HTTP_BAD_HEADER_STRING; - State = hp_error; - return 0; - } else if (ret == 2) { - Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1); - size -= long(HeaderParser.lastchar - (char*)buf + 1); - buf = HeaderParser.lastchar + 1; - State = CheckHeaders() ? hp_error + if (!size) { + switch (State) { + case hp_error: + case hp_eof: + break; + case hp_read_closed: + State = hp_eof; + break; + case hp_in_header: + Header->error = HTTP_HEADER_EOF; + State = hp_error; + break; + case hp_read_alive: + case hp_read_chunk: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_MESSAGE_EOF; + State = hp_error; + } + break; + case hp_begin_chunk_header: + case hp_chunk_header: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_CHUNK_EOF; + State = hp_error; + } + break; + } + return 0; + } + while (size) { + int ret; + + switch (State) { + case hp_error: + return 0; + + case hp_eof: + return 0; + + case hp_in_header: + if ((ret = HeaderParser.Execute(buf, size)) < 0) { + Header->error = HTTP_BAD_HEADER_STRING; + State = hp_error; + return 0; + } else if (ret == 2) { + Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1); + size -= long(HeaderParser.lastchar - (char*)buf + 1); + buf = HeaderParser.lastchar + 1; + State = CheckHeaders() ? hp_error : Header->transfer_chunked ? hp_begin_chunk_header : Header->content_length == 0 ? hp_eof : Header->content_length > 0 ? hp_read_alive : hp_read_closed; - if (State == hp_begin_chunk_header) { - // unget \n for chunk reader + if (State == hp_begin_chunk_header) { + // unget \n for chunk reader buf = (char*)buf - 1; - size++; - } - if (isReader) - return size; - } else { - Header->header_size += size; - size = 0; - } - break; - - case hp_read_alive: - Header->entity_size += size; + size++; + } + if (isReader) + return size; + } else { + Header->header_size += size; + size = 0; + } + break; + + case hp_read_alive: + Header->entity_size += size; if (Header->entity_size >= Header->content_length) { - State = hp_eof; - } + State = hp_eof; + } - TCheck::CheckDocPart(buf, size, Header); - if (isReader) - return size; - size = 0; - break; - - case hp_read_closed: - Header->entity_size += size; - TCheck::CheckDocPart(buf, size, Header); - if (isReader) - return size; - size = 0; - break; - - case hp_begin_chunk_header: - ChunkParser.Init(); - State = hp_chunk_header; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_read_closed: + Header->entity_size += size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_begin_chunk_header: + ChunkParser.Init(); + State = hp_chunk_header; [[fallthrough]]; - - case hp_chunk_header: - if ((ret = ChunkParser.Execute(buf, size)) < 0) { - Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK); - State = hp_error; - return 0; - } else if (ret == 2) { - Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1); - size -= long(ChunkParser.lastchar - (char*)buf + 1); - buf = ChunkParser.lastchar + 1; - ChunkSize = ChunkParser.chunk_length; + + case hp_chunk_header: + if ((ret = ChunkParser.Execute(buf, size)) < 0) { + Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK); + State = hp_error; + return 0; + } else if (ret == 2) { + Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1); + size -= long(ChunkParser.lastchar - (char*)buf + 1); + buf = ChunkParser.lastchar + 1; + ChunkSize = ChunkParser.chunk_length; Y_ASSERT(ChunkSize >= 0); - State = ChunkSize ? hp_read_chunk : hp_eof; - } else { - Header->entity_size += size; - size = 0; - } - break; - - case hp_read_chunk: - if (size >= ChunkSize) { - Header->entity_size += ChunkSize; - State = hp_begin_chunk_header; - TCheck::CheckDocPart(buf, ChunkSize, Header); - if (isReader) - return ChunkSize; - size -= ChunkSize; + State = ChunkSize ? hp_read_chunk : hp_eof; + } else { + Header->entity_size += size; + size = 0; + } + break; + + case hp_read_chunk: + if (size >= ChunkSize) { + Header->entity_size += ChunkSize; + State = hp_begin_chunk_header; + TCheck::CheckDocPart(buf, ChunkSize, Header); + if (isReader) + return ChunkSize; + size -= ChunkSize; buf = (char*)buf + ChunkSize; - } else { - Header->entity_size += size; - ChunkSize -= size; - TCheck::CheckDocPart(buf, size, Header); - if (isReader) - return size; - size = 0; - } + } else { + Header->entity_size += size; + ChunkSize -= size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + } break; - } - } - return size; - } -}; - + } + } + return size; + } +}; + template <class TCheck = TFakeCheck<>> -class THttpParser: public THttpParserGeneric<0, TCheck> { - typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG! -public: +class THttpParser: public THttpParserGeneric<0, TCheck> { + typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG! +public: void Init(THttpHeader* H, bool head_request = false) { - TBaseT::Header = H; - TBaseT::HeaderParser.Init(TBaseT::Header); - TBaseT::State = TBaseT::hp_in_header; - TBaseT::AssumeConnectionClosed = 0; - TBaseT::HeadRequest = head_request; - } - + TBaseT::Header = H; + TBaseT::HeaderParser.Init(TBaseT::Header); + TBaseT::State = TBaseT::hp_in_header; + TBaseT::AssumeConnectionClosed = 0; + TBaseT::HeadRequest = head_request; + } + void Parse(void* buf, long size) { TBaseT::ParseGeneric(buf, size); - } -}; - -class TMemoReader { -public: + } +}; + +class TMemoReader { +public: int Init(void* buf, long bufsize) { - Buf = buf; - Bufsize = bufsize; - return 0; - } + Buf = buf; + Bufsize = bufsize; + return 0; + } long Read(void*& buf) { Y_ASSERT(Bufsize >= 0); - if (!Bufsize) { - Bufsize = -1; - return 0; - } - buf = Buf; - long ret = Bufsize; - Bufsize = 0; - return ret; - } - -protected: - long Bufsize; + if (!Bufsize) { + Bufsize = -1; + return 0; + } + buf = Buf; + long ret = Bufsize; + Bufsize = 0; + return ret; + } + +protected: + long Bufsize; void* Buf; -}; - -template <class Reader> -class THttpReader: public THttpParserGeneric<1>, public Reader { - typedef THttpParserGeneric<1> TBaseT; +}; + +template <class Reader> +class THttpReader: public THttpParserGeneric<1>, public Reader { + typedef THttpParserGeneric<1> TBaseT; -public: +public: using TBaseT::AssumeConnectionClosed; - using TBaseT::Header; - using TBaseT::ParseGeneric; - using TBaseT::State; - + using TBaseT::Header; + using TBaseT::ParseGeneric; + using TBaseT::State; + int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) { - Header = H; - Eoferr = 1; - Size = 0; + Header = H; + Eoferr = 1; + Size = 0; AssumeConnectionClosed = assumeConnectionClosed; HeadRequest = headRequest; - return parsHeader ? ParseHeader() : SkipHeader(); - } - + return parsHeader ? ParseHeader() : SkipHeader(); + } + long Read(void*& buf) { - long Chunk; - do { - if (!Size) { - if (Eoferr != 1) - return Eoferr; - else if ((Size = (long)Reader::Read(Ptr)) < 0) { - Header->error = HTTP_CONNECTION_LOST; - return Eoferr = -1; - } - } + long Chunk; + do { + if (!Size) { + if (Eoferr != 1) + return Eoferr; + else if ((Size = (long)Reader::Read(Ptr)) < 0) { + Header->error = HTTP_CONNECTION_LOST; + return Eoferr = -1; + } + } Chunk = ParseGeneric(Ptr, Size); - buf = Ptr; - Ptr = (char*)Ptr + Chunk; - Size -= Chunk; + buf = Ptr; + Ptr = (char*)Ptr + Chunk; + Size -= Chunk; if (State == hp_eof) { Size = 0; - Eoferr = 0; + Eoferr = 0; } else if (State == hp_error) - return Eoferr = -1; - } while (!Chunk); - return Chunk; - } - -protected: - int ParseHeader() { - HeaderParser.Init(Header); - State = hp_in_header; - while (State == hp_in_header) { - if ((Size = (long)Reader::Read(Ptr)) < 0) - return Eoferr = -1; + return Eoferr = -1; + } while (!Chunk); + return Chunk; + } + +protected: + int ParseHeader() { + HeaderParser.Init(Header); + State = hp_in_header; + while (State == hp_in_header) { + if ((Size = (long)Reader::Read(Ptr)) < 0) + return Eoferr = -1; ParseGeneric(Ptr, Size); - } - if (State == hp_error) - return Eoferr = -1; - if (State == hp_eof) - Eoferr = 0; - return 0; - } - - int SkipHeader() { - long hdrsize = Header->header_size; - while (hdrsize) { - if ((Size = (long)Reader::Read(Ptr)) <= 0) - return Eoferr = -1; - if (Size >= hdrsize) { - Size -= hdrsize; - Ptr = (char*)Ptr + hdrsize; - break; - } - hdrsize -= Size; - } - State = Header->transfer_chunked ? hp_begin_chunk_header + } + if (State == hp_error) + return Eoferr = -1; + if (State == hp_eof) + Eoferr = 0; + return 0; + } + + int SkipHeader() { + long hdrsize = Header->header_size; + while (hdrsize) { + if ((Size = (long)Reader::Read(Ptr)) <= 0) + return Eoferr = -1; + if (Size >= hdrsize) { + Size -= hdrsize; + Ptr = (char*)Ptr + hdrsize; + break; + } + hdrsize -= Size; + } + State = Header->transfer_chunked ? hp_begin_chunk_header : Header->content_length == 0 ? hp_eof : Header->content_length > 0 ? hp_read_alive : hp_read_closed; - Header->entity_size = 0; - if (State == hp_eof) - Eoferr = 0; - else if (State == hp_begin_chunk_header) { - // unget \n for chunk reader - Ptr = (char*)Ptr - 1; - ++Size; - } - return 0; - } - + Header->entity_size = 0; + if (State == hp_eof) + Eoferr = 0; + else if (State == hp_begin_chunk_header) { + // unget \n for chunk reader + Ptr = (char*)Ptr - 1; + ++Size; + } + return 0; + } + void* Ptr; - long Size; + long Size; int Eoferr; -}; +}; diff --git a/library/cpp/http/fetch/httpzreader.h b/library/cpp/http/fetch/httpzreader.h index d951d21e9a..68eb00853d 100644 --- a/library/cpp/http/fetch/httpzreader.h +++ b/library/cpp/http/fetch/httpzreader.h @@ -1,55 +1,55 @@ #pragma once - + #include "httpheader.h" #include "httpparser.h" #include "exthttpcodes.h" - + #include <util/system/defaults.h> #include <util/generic/yexception.h> -#include <contrib/libs/zlib/zlib.h> - +#include <contrib/libs/zlib/zlib.h> + #include <errno.h> -#ifndef ENOTSUP +#ifndef ENOTSUP #define ENOTSUP 45 -#endif - -template <class Reader> -class TCompressedHttpReader: public THttpReader<Reader> { - typedef THttpReader<Reader> TBase; +#endif + +template <class Reader> +class TCompressedHttpReader: public THttpReader<Reader> { + typedef THttpReader<Reader> TBase; -public: +public: using TBase::AssumeConnectionClosed; - using TBase::Header; - using TBase::ParseGeneric; - using TBase::State; - + using TBase::Header; + using TBase::ParseGeneric; + using TBase::State; + static constexpr size_t DefaultBufSize = 64 << 10; static constexpr unsigned int DefaultWinSize = 15; - TCompressedHttpReader() - : CompressedInput(false) - , BufSize(0) - , CurContSize(0) - , MaxContSize(0) + TCompressedHttpReader() + : CompressedInput(false) + , BufSize(0) + , CurContSize(0) + , MaxContSize(0) , Buf(nullptr) - , ZErr(0) + , ZErr(0) , ConnectionClosed(0) , IgnoreTrailingGarbage(true) - { - memset(&Stream, 0, sizeof(Stream)); - } - - ~TCompressedHttpReader() { - ClearStream(); - - if (Buf) { - free(Buf); + { + memset(&Stream, 0, sizeof(Stream)); + } + + ~TCompressedHttpReader() { + ClearStream(); + + if (Buf) { + free(Buf); Buf = nullptr; - } - } - + } + } + void SetConnectionClosed(int cc) { ConnectionClosed = cc; } @@ -66,196 +66,196 @@ public: const unsigned int winSize = DefaultWinSize, bool headRequest = false) { - ZErr = 0; - CurContSize = 0; - MaxContSize = maxContSize; - + ZErr = 0; + CurContSize = 0; + MaxContSize = maxContSize; + int ret = TBase::Init(H, parsHeader, ConnectionClosed, headRequest); - if (ret) - return ret; - - ret = SetCompression(H->compression_method, bufSize, winSize); - return ret; - } - + if (ret) + return ret; + + ret = SetCompression(H->compression_method, bufSize, winSize); + return ret; + } + long Read(void*& buf) { - if (!CompressedInput) { - long res = TBase::Read(buf); - if (res > 0) { - CurContSize += (size_t)res; - if (CurContSize > MaxContSize) { - ZErr = E2BIG; - return -1; - } - } - return res; - } - - while (true) { - if (Stream.avail_in == 0) { + if (!CompressedInput) { + long res = TBase::Read(buf); + if (res > 0) { + CurContSize += (size_t)res; + if (CurContSize > MaxContSize) { + ZErr = E2BIG; + return -1; + } + } + return res; + } + + while (true) { + if (Stream.avail_in == 0) { void* tmpin = Stream.next_in; long res = TBase::Read(tmpin); Stream.next_in = (Bytef*)tmpin; if (res <= 0) return res; Stream.avail_in = (uInt)res; - } - - Stream.next_out = Buf; - Stream.avail_out = (uInt)BufSize; - buf = Buf; - - int err = inflate(&Stream, Z_SYNC_FLUSH); - + } + + Stream.next_out = Buf; + Stream.avail_out = (uInt)BufSize; + buf = Buf; + + int err = inflate(&Stream, Z_SYNC_FLUSH); + //Y_ASSERT(Stream.avail_in == 0); - - switch (err) { - case Z_OK: - // there is no data in next_out yet - if (BufSize == Stream.avail_out) - continue; + + switch (err) { + case Z_OK: + // there is no data in next_out yet + if (BufSize == Stream.avail_out) + continue; [[fallthrough]]; // don't break or return; continue with Z_STREAM_END case - - case Z_STREAM_END: - if (Stream.total_out > MaxContSize) { - ZErr = E2BIG; - return -1; - } + + case Z_STREAM_END: + if (Stream.total_out > MaxContSize) { + ZErr = E2BIG; + return -1; + } if (!IgnoreTrailingGarbage && BufSize == Stream.avail_out && Stream.avail_in > 0) { Header->error = EXT_HTTP_GZIPERROR; ZErr = EFAULT; Stream.msg = (char*)"trailing garbage"; return -1; } - return long(BufSize - Stream.avail_out); - - case Z_NEED_DICT: - case Z_DATA_ERROR: - Header->error = EXT_HTTP_GZIPERROR; - ZErr = EFAULT; - return -1; - - case Z_MEM_ERROR: - ZErr = ENOMEM; - return -1; - - default: - ZErr = EINVAL; - return -1; - } - } - - return -1; - } - + return long(BufSize - Stream.avail_out); + + case Z_NEED_DICT: + case Z_DATA_ERROR: + Header->error = EXT_HTTP_GZIPERROR; + ZErr = EFAULT; + return -1; + + case Z_MEM_ERROR: + ZErr = ENOMEM; + return -1; + + default: + ZErr = EINVAL; + return -1; + } + } + + return -1; + } + const char* ZMsg() const { - return Stream.msg; - } - - int ZError() const { - return ZErr; - } - - size_t GetCurContSize() const { - return CompressedInput ? Stream.total_out : CurContSize; - } - -protected: - int SetCompression(const int compression, const size_t bufSize, - const unsigned int winSize) { - ClearStream(); - - int winsize = winSize; - switch ((enum HTTP_COMPRESSION)compression) { - case HTTP_COMPRESSION_UNSET: - case HTTP_COMPRESSION_IDENTITY: - CompressedInput = false; - return 0; - case HTTP_COMPRESSION_GZIP: - CompressedInput = true; + return Stream.msg; + } + + int ZError() const { + return ZErr; + } + + size_t GetCurContSize() const { + return CompressedInput ? Stream.total_out : CurContSize; + } + +protected: + int SetCompression(const int compression, const size_t bufSize, + const unsigned int winSize) { + ClearStream(); + + int winsize = winSize; + switch ((enum HTTP_COMPRESSION)compression) { + case HTTP_COMPRESSION_UNSET: + case HTTP_COMPRESSION_IDENTITY: + CompressedInput = false; + return 0; + case HTTP_COMPRESSION_GZIP: + CompressedInput = true; winsize += 16; // 16 indicates gzip, see zlib.h - break; - case HTTP_COMPRESSION_DEFLATE: - CompressedInput = true; - winsize = -winsize; // negative indicates raw deflate stream, see zlib.h - break; - case HTTP_COMPRESSION_COMPRESS: - case HTTP_COMPRESSION_ERROR: - default: - CompressedInput = false; - ZErr = ENOTSUP; - return -1; - } - - if (bufSize != BufSize) { - if (Buf) - free(Buf); - Buf = (ui8*)malloc(bufSize); - if (!Buf) { - ZErr = ENOMEM; - return -1; - } - BufSize = bufSize; - } - - int err = inflateInit2(&Stream, winsize); - switch (err) { - case Z_OK: - Stream.total_in = 0; - Stream.total_out = 0; - Stream.avail_in = 0; - return 0; - - case Z_DATA_ERROR: // never happens, see zlib.h - CompressedInput = false; - ZErr = EFAULT; - return -1; - - case Z_MEM_ERROR: - CompressedInput = false; - ZErr = ENOMEM; - return -1; - - default: - CompressedInput = false; - ZErr = EINVAL; - return -1; - } - } - - void ClearStream() { - if (CompressedInput) { - inflateEnd(&Stream); - CompressedInput = false; - } - } - - z_stream Stream; - bool CompressedInput; - size_t BufSize; - size_t CurContSize, MaxContSize; - ui8* Buf; - int ZErr; + break; + case HTTP_COMPRESSION_DEFLATE: + CompressedInput = true; + winsize = -winsize; // negative indicates raw deflate stream, see zlib.h + break; + case HTTP_COMPRESSION_COMPRESS: + case HTTP_COMPRESSION_ERROR: + default: + CompressedInput = false; + ZErr = ENOTSUP; + return -1; + } + + if (bufSize != BufSize) { + if (Buf) + free(Buf); + Buf = (ui8*)malloc(bufSize); + if (!Buf) { + ZErr = ENOMEM; + return -1; + } + BufSize = bufSize; + } + + int err = inflateInit2(&Stream, winsize); + switch (err) { + case Z_OK: + Stream.total_in = 0; + Stream.total_out = 0; + Stream.avail_in = 0; + return 0; + + case Z_DATA_ERROR: // never happens, see zlib.h + CompressedInput = false; + ZErr = EFAULT; + return -1; + + case Z_MEM_ERROR: + CompressedInput = false; + ZErr = ENOMEM; + return -1; + + default: + CompressedInput = false; + ZErr = EINVAL; + return -1; + } + } + + void ClearStream() { + if (CompressedInput) { + inflateEnd(&Stream); + CompressedInput = false; + } + } + + z_stream Stream; + bool CompressedInput; + size_t BufSize; + size_t CurContSize, MaxContSize; + ui8* Buf; + int ZErr; int ConnectionClosed; bool IgnoreTrailingGarbage; -}; - +}; + class zlib_exception: public yexception { -}; - -template <class Reader> -class SCompressedHttpReader: public TCompressedHttpReader<Reader> { - typedef TCompressedHttpReader<Reader> TBase; - -public: - using TBase::ZError; - using TBase::ZMsg; - - SCompressedHttpReader() +}; + +template <class Reader> +class SCompressedHttpReader: public TCompressedHttpReader<Reader> { + typedef TCompressedHttpReader<Reader> TBase; + +public: + using TBase::ZError; + using TBase::ZMsg; + + SCompressedHttpReader() : TBase() { } - + int Init( THttpHeader* H, int parsHeader, @@ -265,31 +265,31 @@ public: bool headRequest = false) { int ret = TBase::Init(H, parsHeader, maxContSize, bufSize, winSize, headRequest); - return (int)HandleRetValue((long)ret); - } - + return (int)HandleRetValue((long)ret); + } + long Read(void*& buf) { - long ret = TBase::Read(buf); - return HandleRetValue(ret); - } - -protected: - long HandleRetValue(long ret) { - switch (ZError()) { - case 0: - return ret; - case ENOMEM: + long ret = TBase::Read(buf); + return HandleRetValue(ret); + } + +protected: + long HandleRetValue(long ret) { + switch (ZError()) { + case 0: + return ret; + case ENOMEM: ythrow yexception() << "SCompressedHttpReader: not enough memory"; - case EINVAL: + case EINVAL: ythrow yexception() << "SCompressedHttpReader: zlib error: " << ZMsg(); - case ENOTSUP: + case ENOTSUP: ythrow yexception() << "SCompressedHttpReader: unsupported compression method"; - case EFAULT: + case EFAULT: ythrow zlib_exception() << "SCompressedHttpReader: " << ZMsg(); - case E2BIG: + case E2BIG: ythrow zlib_exception() << "SCompressedHttpReader: Content exceeds maximum length"; - default: + default: ythrow yexception() << "SCompressedHttpReader: unknown error"; - } - } -}; + } + } +}; diff --git a/library/cpp/http/fetch/ya.make b/library/cpp/http/fetch/ya.make index aa8b073a8c..7737127463 100644 --- a/library/cpp/http/fetch/ya.make +++ b/library/cpp/http/fetch/ya.make @@ -1,5 +1,5 @@ LIBRARY() - + OWNER( g:zora ) @@ -14,25 +14,25 @@ PEERDIR( library/cpp/uri ) -SRCS( - http_digest.cpp - http_socket.cpp +SRCS( + http_digest.cpp + http_socket.cpp httpheader.cpp - httpload.cpp - exthttpcodes.cpp + httpload.cpp + exthttpcodes.cpp httpfsm.rl6 - httpagent.h - httpfetcher.h - httpheader.h - httpparser.h - httpzreader.h + httpagent.h + httpfetcher.h + httpheader.h + httpparser.h + httpzreader.h sockhandler.h -) - +) + GENERATE_ENUM_SERIALIZATION(httpheader.h) SET(RAGEL6_FLAGS -CF1) -END() +END() RECURSE_FOR_TESTS(ut) |