aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/http/fetch
diff options
context:
space:
mode:
authorleo <leo@yandex-team.ru>2022-02-10 16:46:40 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:40 +0300
commit99609724f661f7e21d1cb08e8d80e87c3632fdb3 (patch)
tree49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/http/fetch
parent980edcd3304699edf9d4e4d6a656e585028e2a72 (diff)
downloadydb-99609724f661f7e21d1cb08e8d80e87c3632fdb3.tar.gz
Restoring authorship annotation for <leo@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
-rw-r--r--library/cpp/http/fetch/exthttpcodes.cpp44
-rw-r--r--library/cpp/http/fetch/exthttpcodes.h42
-rw-r--r--library/cpp/http/fetch/http_digest.cpp238
-rw-r--r--library/cpp/http/fetch/http_digest.h32
-rw-r--r--library/cpp/http/fetch/http_socket.cpp242
-rw-r--r--library/cpp/http/fetch/httpfetcher.h212
-rw-r--r--library/cpp/http/fetch/httpfsm.h74
-rw-r--r--library/cpp/http/fetch/httpfsm.rl6434
-rw-r--r--library/cpp/http/fetch/httpheader.h84
-rw-r--r--library/cpp/http/fetch/httpload.cpp376
-rw-r--r--library/cpp/http/fetch/httpload.h316
-rw-r--r--library/cpp/http/fetch/httpparser.h606
-rw-r--r--library/cpp/http/fetch/httpzreader.h440
-rw-r--r--library/cpp/http/fetch/ya.make28
14 files changed, 1584 insertions, 1584 deletions
diff --git a/library/cpp/http/fetch/exthttpcodes.cpp b/library/cpp/http/fetch/exthttpcodes.cpp
index ac0d3b359d..acc05650c8 100644
--- a/library/cpp/http/fetch/exthttpcodes.cpp
+++ b/library/cpp/http/fetch/exthttpcodes.cpp
@@ -1,18 +1,18 @@
#include "exthttpcodes.h"
-#include <cstring>
-
+#include <cstring>
+
const ui16 CrazyServer = ShouldDelete | MarkSuspect;
-
+
struct http_flag {
ui16 http;
ui16 flag;
};
-static http_flag HTTP_FLAG[] = {
+static http_flag HTTP_FLAG[] = {
{HTTP_CONTINUE, MarkSuspect}, // 100
{HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101
{HTTP_PROCESSING, CrazyServer}, // 102
-
+
{HTTP_OK, ShouldReindex}, // 200
{HTTP_CREATED, CrazyServer}, // 201
{HTTP_ACCEPTED, ShouldDelete}, // 202
@@ -23,7 +23,7 @@ static http_flag HTTP_FLAG[] = {
{HTTP_MULTI_STATUS, CrazyServer}, // 207
{HTTP_ALREADY_REPORTED, CrazyServer}, // 208
{HTTP_IM_USED, CrazyServer}, // 226
-
+
{HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete}, // 300
{HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir}, // 301
{HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir}, // 302
@@ -32,7 +32,7 @@ static http_flag HTTP_FLAG[] = {
{HTTP_USE_PROXY, ShouldDelete}, // 305
{HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307
{HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308
-
+
{HTTP_BAD_REQUEST, CrazyServer}, // 400
{HTTP_UNAUTHORIZED, ShouldDelete}, // 401
{HTTP_PAYMENT_REQUIRED, ShouldDelete}, // 402
@@ -53,7 +53,7 @@ static http_flag HTTP_FLAG[] = {
{HTTP_EXPECTATION_FAILED, ShouldDelete}, // 417
{HTTP_I_AM_A_TEAPOT, CrazyServer}, // 418
{HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete}, // 419
-
+
{HTTP_MISDIRECTED_REQUEST, CrazyServer}, // 421
{HTTP_UNPROCESSABLE_ENTITY, CrazyServer}, // 422
{HTTP_LOCKED, ShouldDelete}, // 423
@@ -62,7 +62,7 @@ static http_flag HTTP_FLAG[] = {
{HTTP_PRECONDITION_REQUIRED, ShouldDelete}, // 428
{HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429
{HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete}, // 451
-
+
{HTTP_INTERNAL_SERVER_ERROR, MarkSuspect}, // 500
{HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect}, // 501
{HTTP_BAD_GATEWAY, MarkSuspect}, // 502
@@ -116,7 +116,7 @@ static http_flag HTTP_FLAG[] = {
{HTTP_FETCHER_BAD_RESPONSE, 0}, // 1040
{HTTP_FETCHER_MB_ERROR, 0}, // 1041
{HTTP_SSL_CERT_ERROR, 0}, // 1042
-
+
// Custom (replace HTTP 200/304)
{EXT_HTTP_MIRRMOVE, 0}, // 2000
{EXT_HTTP_MANUAL_DELETE, ShouldDelete}, // 2001
@@ -142,34 +142,34 @@ static http_flag HTTP_FLAG[] = {
{EXT_HTTP_EMPTY_RESPONSE, ShouldDelete}, // 2024
{EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025
{0, 0}};
-
+
static ui16* prepare_flags(http_flag* arg) {
- static ui16 flags[EXT_HTTP_CODE_MAX];
+ static ui16 flags[EXT_HTTP_CODE_MAX];
http_flag* ptr;
- size_t i;
-
+ size_t i;
+
// устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше
for (i = 0; i < EXT_HTTP_CODE_MAX; ++i)
- flags[i] = CrazyServer;
-
+ flags[i] = CrazyServer;
+
// устанавливаем флаги для перечисленных кодов
for (ptr = arg; ptr->http; ++ptr)
flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
-
+
// для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их
// всем кодам не перечисленным в таблице выше
for (size_t group = 0; group < 1000; group += 100)
for (size_t j = group + 1; j < group + 100; ++j)
flags[j] = flags[group];
-
+
// предыдущий цикл затер некоторые флаги перечисленные в таблице выше
// восстанавливаем их
for (ptr = arg; ptr->http; ++ptr)
flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
-
- return flags;
-}
-
+
+ return flags;
+}
+
ui16* http2status = prepare_flags(HTTP_FLAG);
TStringBuf ExtHttpCodeStr(int code) noexcept {
diff --git a/library/cpp/http/fetch/exthttpcodes.h b/library/cpp/http/fetch/exthttpcodes.h
index 88bfe8d829..6b525052cd 100644
--- a/library/cpp/http/fetch/exthttpcodes.h
+++ b/library/cpp/http/fetch/exthttpcodes.h
@@ -1,9 +1,9 @@
#pragma once
-
-#include <util/system/defaults.h>
+
+#include <util/system/defaults.h>
#include <library/cpp/http/misc/httpcodes.h>
-
-enum ExtHttpCodes {
+
+enum ExtHttpCodes {
// Custom
HTTP_EXTENDED = 1000,
HTTP_BAD_RESPONSE_HEADER = 1000,
@@ -50,8 +50,8 @@ enum ExtHttpCodes {
HTTP_FETCHER_MB_ERROR = 1041,
HTTP_SSL_CERT_ERROR = 1042,
HTTP_PROXY_REQUEST_CANCELED = 1051,
-
- // Custom (replace HTTP 200/304)
+
+ // Custom (replace HTTP 200/304)
EXT_HTTP_EXT_SUCCESS_BEGIN = 2000, // to check if code variable is in success interval
EXT_HTTP_MIRRMOVE = 2000,
EXT_HTTP_MANUAL_DELETE = 2001,
@@ -104,22 +104,22 @@ enum ExtHttpCodes {
EXT_HTTP_WRONGMULTILANG = 3023,
EXT_HTTP_SOFTMIRRORS = 3024,
EXT_HTTP_BIGLEVEL = 3025,
-
- // fast robot codes
-
+
+ // fast robot codes
+
EXT_HTTP_FASTHOPS = 4000,
EXT_HTTP_NODOC = 4001,
EXT_HTTP_MAX
-};
-
-enum HttpFlags {
- // connection
- ShouldDisconnect = 1,
+};
+
+enum HttpFlags {
+ // connection
+ ShouldDisconnect = 1,
ShouldRetry = 2,
// UNUSED 4
-
- // indexer
+
+ // indexer
ShouldReindex = 8,
ShouldDelete = 16,
CheckLocation = 32,
@@ -129,13 +129,13 @@ enum HttpFlags {
// UNUSED 512
MoveRedir = 1024,
CanBeFake = 2048,
-};
-
+};
+
const size_t EXT_HTTP_CODE_MAX = 1 << 12;
-
-static inline int Http2Status(int code) {
+
+static inline int Http2Status(int code) {
extern ui16* http2status;
return http2status[code & (EXT_HTTP_CODE_MAX - 1)];
-}
+}
TStringBuf ExtHttpCodeStr(int code) noexcept;
diff --git a/library/cpp/http/fetch/http_digest.cpp b/library/cpp/http/fetch/http_digest.cpp
index 1c8bc6f449..1eaa02b7f2 100644
--- a/library/cpp/http/fetch/http_digest.cpp
+++ b/library/cpp/http/fetch/http_digest.cpp
@@ -3,204 +3,204 @@
#include <library/cpp/digest/md5/md5.h>
#include <util/stream/output.h>
#include <util/stream/str.h>
-
-/************************************************************/
-/************************************************************/
-static const char* WWW_PREFIX = "Authorization: Digest ";
-
-/************************************************************/
+
+/************************************************************/
+/************************************************************/
+static const char* WWW_PREFIX = "Authorization: Digest ";
+
+/************************************************************/
httpDigestHandler::httpDigestHandler()
: User_(nullptr)
, Password_(nullptr)
, Nonce_(nullptr)
, NonceCount_(0)
, HeaderInstruction_(nullptr)
-{
-}
-
-/************************************************************/
+{
+}
+
+/************************************************************/
httpDigestHandler::~httpDigestHandler() {
- clear();
-}
-
-/************************************************************/
+ clear();
+}
+
+/************************************************************/
void httpDigestHandler::clear() {
free(Nonce_);
free(HeaderInstruction_);
User_ = Password_ = nullptr;
Nonce_ = HeaderInstruction_ = nullptr;
NonceCount_ = 0;
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpDigestHandler::setAuthorization(const char* user, const char* password) {
- clear();
+ clear();
if (user && password) {
User_ = user;
Password_ = password;
- }
-}
-
-/************************************************************/
+ }
+}
+
+/************************************************************/
const char* httpDigestHandler::getHeaderInstruction() const {
return HeaderInstruction_;
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpDigestHandler::generateCNonce(char* outCNonce) {
- if (!*outCNonce)
+ if (!*outCNonce)
sprintf(outCNonce, "%ld", (long)time(nullptr));
-}
-
-/************************************************************/
+}
+
+/************************************************************/
inline void addMD5(MD5& ctx, const char* value) {
- ctx.Update((const unsigned char*)(value), strlen(value));
-}
-
+ ctx.Update((const unsigned char*)(value), strlen(value));
+}
+
inline void addMD5(MD5& ctx, const char* value, int len) {
- ctx.Update((const unsigned char*)(value), len);
-}
-
+ ctx.Update((const unsigned char*)(value), len);
+}
+
inline void addMD5Sep(MD5& ctx) {
- addMD5(ctx, ":", 1);
-}
-
-/************************************************************/
-/* calculate H(A1) as per spec */
+ addMD5(ctx, ":", 1);
+}
+
+/************************************************************/
+/* calculate H(A1) as per spec */
void httpDigestHandler::digestCalcHA1(const THttpAuthHeader& hd,
char* outSessionKey,
char* outCNonce) {
- MD5 ctx;
- ctx.Init();
+ MD5 ctx;
+ ctx.Init();
addMD5(ctx, User_);
addMD5Sep(ctx);
addMD5(ctx, hd.realm);
addMD5Sep(ctx);
addMD5(ctx, Password_);
-
+
if (hd.algorithm == 1) { //MD5-sess
- unsigned char digest[16];
- ctx.Final(digest);
-
- generateCNonce(outCNonce);
-
- ctx.Init();
- ctx.Update(digest, 16);
+ unsigned char digest[16];
+ ctx.Final(digest);
+
+ generateCNonce(outCNonce);
+
+ ctx.Init();
+ ctx.Update(digest, 16);
addMD5Sep(ctx);
addMD5(ctx, hd.nonce);
addMD5Sep(ctx);
addMD5(ctx, outCNonce);
- ctx.End(outSessionKey);
- }
-
+ ctx.End(outSessionKey);
+ }
+
ctx.End(outSessionKey);
-};
-
-/************************************************************/
-/* calculate request-digest/response-digest as per HTTP Digest spec */
+};
+
+/************************************************************/
+/* calculate request-digest/response-digest as per HTTP Digest spec */
void httpDigestHandler::digestCalcResponse(const THttpAuthHeader& hd,
const char* path,
const char* method,
const char* nonceCount,
char* outResponse,
char* outCNonce) {
- char HA1[33];
- digestCalcHA1(hd, HA1, outCNonce);
-
- char HA2[33];
- MD5 ctx;
- ctx.Init();
+ char HA1[33];
+ digestCalcHA1(hd, HA1, outCNonce);
+
+ char HA2[33];
+ MD5 ctx;
+ ctx.Init();
addMD5(ctx, method);
addMD5Sep(ctx);
addMD5(ctx, path);
- //ignore auth-int
- ctx.End(HA2);
-
- ctx.Init();
+ //ignore auth-int
+ ctx.End(HA2);
+
+ ctx.Init();
addMD5(ctx, HA1, 32);
addMD5Sep(ctx);
addMD5(ctx, Nonce_);
addMD5Sep(ctx);
-
+
if (hd.qop_auth) {
- if (!*outCNonce)
- generateCNonce(outCNonce);
-
+ if (!*outCNonce)
+ generateCNonce(outCNonce);
+
addMD5(ctx, nonceCount, 8);
addMD5Sep(ctx);
addMD5(ctx, outCNonce);
addMD5Sep(ctx);
addMD5(ctx, "auth", 4);
addMD5Sep(ctx);
- }
+ }
addMD5(ctx, HA2, 32);
- ctx.End(outResponse);
-}
-
-/************************************************************/
+ ctx.End(outResponse);
+}
+
+/************************************************************/
bool httpDigestHandler::processHeader(const THttpAuthHeader* header,
const char* path,
const char* method,
const char* cnonce) {
if (!User_ || !header || !header->use_auth || !header->realm || !header->nonce)
- return false;
-
+ return false;
+
if (Nonce_) {
if (strcmp(Nonce_, header->nonce)) {
free(Nonce_);
Nonce_ = nullptr;
NonceCount_ = 0;
- }
- }
+ }
+ }
if (!Nonce_) {
Nonce_ = strdup(header->nonce);
NonceCount_ = 0;
- }
+ }
free(HeaderInstruction_);
HeaderInstruction_ = nullptr;
NonceCount_++;
-
- char nonceCount[20];
+
+ char nonceCount[20];
sprintf(nonceCount, "%08d", NonceCount_);
-
- char CNonce[50];
- if (cnonce)
- strcpy(CNonce, cnonce);
- else
+
+ char CNonce[50];
+ if (cnonce)
+ strcpy(CNonce, cnonce);
+ else
CNonce[0] = 0;
-
- char response[33];
+
+ char response[33];
digestCalcResponse(*header, path, method, nonceCount, response, CNonce);
-
- //digest-response = 1#( username | realm | nonce | digest-uri
- // | response | [ algorithm ] | [cnonce] |
- // [opaque] | [message-qop] |
- // [nonce-count] | [auth-param] )
-
- TStringStream out;
+
+ //digest-response = 1#( username | realm | nonce | digest-uri
+ // | response | [ algorithm ] | [cnonce] |
+ // [opaque] | [message-qop] |
+ // [nonce-count] | [auth-param] )
+
+ TStringStream out;
out << WWW_PREFIX << "username=\"" << User_ << "\"";
- out << ", realm=\"" << header->realm << "\"";
- out << ", nonce=\"" << header->nonce << "\"";
- out << ", uri=\"" << path << "\"";
+ out << ", realm=\"" << header->realm << "\"";
+ out << ", nonce=\"" << header->nonce << "\"";
+ out << ", uri=\"" << path << "\"";
if (header->algorithm == 1)
- out << ", algorithm=MD5-sess";
- else
- out << ", algorithm=MD5";
- if (header->qop_auth)
- out << ", qop=auth";
- out << ", nc=" << nonceCount;
- if (CNonce[0])
- out << ", cnonce=\"" << CNonce << "\"";
- out << ", response=\"" << response << "\"";
- if (header->opaque)
- out << ", opaque=\"" << header->opaque << "\"";
- out << "\r\n";
-
+ out << ", algorithm=MD5-sess";
+ else
+ out << ", algorithm=MD5";
+ if (header->qop_auth)
+ out << ", qop=auth";
+ out << ", nc=" << nonceCount;
+ if (CNonce[0])
+ out << ", cnonce=\"" << CNonce << "\"";
+ out << ", response=\"" << response << "\"";
+ if (header->opaque)
+ out << ", opaque=\"" << header->opaque << "\"";
+ out << "\r\n";
+
TString s_out = out.Str();
HeaderInstruction_ = strdup(s_out.c_str());
-
- return true;
-}
-
-/************************************************************/
-/************************************************************/
+
+ return true;
+}
+
+/************************************************************/
+/************************************************************/
diff --git a/library/cpp/http/fetch/http_digest.h b/library/cpp/http/fetch/http_digest.h
index 018107c2e4..3b1872d70b 100644
--- a/library/cpp/http/fetch/http_digest.h
+++ b/library/cpp/http/fetch/http_digest.h
@@ -1,10 +1,10 @@
#pragma once
-
+
#include "httpheader.h"
-#include <util/system/compat.h>
+#include <util/system/compat.h>
#include <library/cpp/http/misc/httpcodes.h>
-
+
class httpDigestHandler {
protected:
const char* User_;
@@ -12,36 +12,36 @@ protected:
char* Nonce_;
int NonceCount_;
char* HeaderInstruction_;
-
- void clear();
-
+
+ void clear();
+
void generateCNonce(char* outCNonce);
-
+
void digestCalcHA1(const THttpAuthHeader& hd,
char* outSessionKey,
char* outCNonce);
-
+
void digestCalcResponse(const THttpAuthHeader& hd,
const char* method,
const char* path,
const char* nonceCount,
char* outResponse,
char* outCNonce);
-
+
public:
- httpDigestHandler();
- ~httpDigestHandler();
-
+ httpDigestHandler();
+ ~httpDigestHandler();
+
void setAuthorization(const char* user,
const char* password);
bool processHeader(const THttpAuthHeader* header,
const char* path,
const char* method,
const char* cnonce = nullptr);
-
+
bool empty() const {
return (!User_);
- }
-
+ }
+
const char* getHeaderInstruction() const;
-};
+};
diff --git a/library/cpp/http/fetch/http_socket.cpp b/library/cpp/http/fetch/http_socket.cpp
index 870d927489..1524ef04a8 100644
--- a/library/cpp/http/fetch/http_socket.cpp
+++ b/library/cpp/http/fetch/http_socket.cpp
@@ -1,30 +1,30 @@
-#include "httpload.h"
-#include "http_digest.h"
-
-/************************************************************/
-
-#ifdef USE_GNUTLS
-
-#include <gcrypt.h>
-#include <gnutls/gnutls.h>
+#include "httpload.h"
+#include "http_digest.h"
+
+/************************************************************/
+
+#ifdef USE_GNUTLS
+
+#include <gcrypt.h>
+#include <gnutls/gnutls.h>
#include <util/network/init.h>
-#include <util/network/socket.h>
-#include <util/system/mutex.h>
-
-/********************************************************/
-// HTTPS handler is used as implementation of
-// socketAbstractHandler for work through HTTPS protocol
-
+#include <util/network/socket.h>
+#include <util/system/mutex.h>
+
+/********************************************************/
+// HTTPS handler is used as implementation of
+// socketAbstractHandler for work through HTTPS protocol
+
class socketSecureHandler: public socketRegularHandler {
protected:
bool IsValid_;
gnutls_session Session_;
gnutls_certificate_credentials Credits_;
-
+
public:
socketSecureHandler();
virtual ~socketSecureHandler();
-
+
virtual bool Good();
virtual int Connect(const TAddrList& addrs, TDuration Timeout);
virtual void Disconnect();
@@ -32,175 +32,175 @@ public:
virtual bool send(const char* message, ssize_t messlen);
virtual bool peek();
virtual ssize_t read(void* buffer, ssize_t buflen);
-};
-
-/********************************************************/
-/********************************************************/
+};
+
+/********************************************************/
+/********************************************************/
static int gcry_pthread_mutex_init(void** priv) {
- int err = 0;
-
- try {
+ int err = 0;
+
+ try {
TMutex* lock = new TMutex;
- *priv = lock;
+ *priv = lock;
} catch (...) {
- err = -1;
- }
-
- return err;
-}
-
+ err = -1;
+ }
+
+ return err;
+}
+
static int gcry_pthread_mutex_destroy(void** lock) {
delete static_cast<TMutex*>(*lock);
- return 0;
-}
-
+ return 0;
+}
+
static int gcry_pthread_mutex_lock(void** lock) {
static_cast<TMutex*>(*lock)->Acquire();
-
- return 0;
-}
-
+
+ return 0;
+}
+
static int gcry_pthread_mutex_unlock(void** lock) {
static_cast<TMutex*>(*lock)->Release();
-
- return 0;
-}
-
-static struct gcry_thread_cbs gcry_threads_pthread =
+
+ return 0;
+}
+
+static struct gcry_thread_cbs gcry_threads_pthread =
{
GCRY_THREAD_OPTION_PTHREAD, NULL,
gcry_pthread_mutex_init, gcry_pthread_mutex_destroy,
gcry_pthread_mutex_lock, gcry_pthread_mutex_unlock,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL};
-
-/********************************************************/
+
+/********************************************************/
struct https_initor {
https_initor() {
gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread);
- gnutls_global_init();
+ gnutls_global_init();
InitNetworkSubSystem();
- }
-
+ }
+
~https_initor() {
- gnutls_global_deinit();
- }
-};
-
-static https_initor _initor;
-
-/********************************************************/
+ gnutls_global_deinit();
+ }
+};
+
+static https_initor _initor;
+
+/********************************************************/
socketSecureHandler::socketSecureHandler()
: socketRegularHandler()
, IsValid_(false)
, Session_()
, Credits_()
-{
-}
-
-/********************************************************/
+{
+}
+
+/********************************************************/
socketSecureHandler::~socketSecureHandler() {
if (IsValid_)
- Disconnect();
-}
-
-/********************************************************/
+ Disconnect();
+}
+
+/********************************************************/
bool socketSecureHandler::Good() {
return Socket_.Good() && IsValid_;
-}
-
-/********************************************************/
+}
+
+/********************************************************/
int socketSecureHandler::Connect(const TAddrList& addrs, TDuration Timeout) {
IsValid_ = false;
-
+
int ret = socketRegularHandler::Connect(addrs, Timeout);
- if (ret)
- return ret;
-
+ if (ret)
+ return ret;
+
gnutls_certificate_allocate_credentials(&Credits_);
gnutls_init(&Session_, GNUTLS_CLIENT);
gnutls_set_default_priority(Session_);
gnutls_credentials_set(Session_, GNUTLS_CRD_CERTIFICATE, Credits_);
-
+
SOCKET fd = Socket_;
gnutls_transport_set_ptr(Session_, (gnutls_transport_ptr)fd);
-
+
ret = gnutls_handshake(Session_);
-
+
if (ret < 0) {
- fprintf(stderr, "*** Handshake failed\n");
- gnutls_perror(ret);
-
+ fprintf(stderr, "*** Handshake failed\n");
+ gnutls_perror(ret);
+
gnutls_deinit(Session_);
if (Credits_) {
gnutls_certificate_free_credentials(Credits_);
Credits_ = 0;
- }
- return 1;
- }
-
+ }
+ return 1;
+ }
+
IsValid_ = true;
return !IsValid_;
-}
-
-/********************************************************/
+}
+
+/********************************************************/
void socketSecureHandler::Disconnect() {
if (IsValid_) {
gnutls_bye(Session_, GNUTLS_SHUT_RDWR);
IsValid_ = false;
gnutls_deinit(Session_);
- }
-
+ }
+
if (Credits_) {
gnutls_certificate_free_credentials(Credits_);
Credits_ = 0;
- }
-
- socketRegularHandler::Disconnect();
-}
-
-/********************************************************/
+ }
+
+ socketRegularHandler::Disconnect();
+}
+
+/********************************************************/
void socketSecureHandler::shutdown() {
-}
-
-/********************************************************/
+}
+
+/********************************************************/
bool socketSecureHandler::send(const char* message, ssize_t messlen) {
if (!IsValid_)
- return false;
+ return false;
ssize_t rv = gnutls_record_send(Session_, message, messlen);
- return rv >= 0;
-}
-
-/********************************************************/
+ return rv >= 0;
+}
+
+/********************************************************/
bool socketSecureHandler::peek() {
- //ssize_t rv = gnutls_record_check_pending(mSession);
- //return rv>0;
- return true;
-}
-
-/********************************************************/
+ //ssize_t rv = gnutls_record_check_pending(mSession);
+ //return rv>0;
+ return true;
+}
+
+/********************************************************/
ssize_t socketSecureHandler::read(void* buffer, ssize_t buflen) {
if (!IsValid_)
- return false;
+ return false;
return gnutls_record_recv(Session_, (char*)buffer, buflen);
-}
-
-#endif
-
-/************************************************************/
+}
+
+#endif
+
+/************************************************************/
socketAbstractHandler* socketHandlerFactory::chooseHandler(const THttpURL& url) {
if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTP)
- return new socketRegularHandler;
-
+ return new socketRegularHandler;
+
#ifdef USE_GNUTLS
if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTPS)
- return new socketSecureHandler;
+ return new socketSecureHandler;
#endif
-
+
return nullptr;
-}
-
-/************************************************************/
-socketHandlerFactory socketHandlerFactory::sInstance;
-/************************************************************/
+}
+
+/************************************************************/
+socketHandlerFactory socketHandlerFactory::sInstance;
+/************************************************************/
diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h
index 1c5b94a678..7fc251afd2 100644
--- a/library/cpp/http/fetch/httpfetcher.h
+++ b/library/cpp/http/fetch/httpfetcher.h
@@ -1,22 +1,22 @@
#pragma once
-
-#ifdef _MSC_VER
+
+#ifdef _MSC_VER
#include <io.h>
-#endif
-
+#endif
+
#include <library/cpp/http/misc/httpdate.h>
-
-#include "httpagent.h"
-#include "httpparser.h"
-
-struct TFakeBackup {
+
+#include "httpagent.h"
+#include "httpparser.h"
+
+struct TFakeBackup {
int Write(void* /*buf*/, size_t /*size*/) {
return 0;
}
-};
-
-template <size_t bufsize = 5000>
-struct TFakeAlloc {
+};
+
+template <size_t bufsize = 5000>
+struct TFakeAlloc {
void Shrink(void* /*buf*/, size_t /*size*/) {
}
void* Grab(size_t /*min*/, size_t* real) {
@@ -24,17 +24,17 @@ struct TFakeAlloc {
return buf;
}
char buf[bufsize];
-};
-
+};
+
template <typename TAlloc = TFakeAlloc<>,
typename TCheck = TFakeCheck<>,
- typename TWriter = TFakeBackup,
+ typename TWriter = TFakeBackup,
typename TAgent = THttpAgent<>>
class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent {
-public:
- static const size_t TCP_MIN = 1500;
- static int TerminateNow;
-
+public:
+ static const size_t TCP_MIN = 1500;
+ static int TerminateNow;
+
THttpFetcher()
: THttpParser<TCheck>()
, TAlloc()
@@ -47,54 +47,54 @@ public:
}
int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) {
- int ret = 0;
- int fetcherr = 0;
-
- THttpParser<TCheck>::Init(header, head_request);
+ int ret = 0;
+ int fetcherr = 0;
+
+ THttpParser<TCheck>::Init(header, head_request);
const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme());
size_t schemelen = strlen(scheme);
- if (*path == '/') {
+ if (*path == '/') {
header->base = TStringBuf(scheme, schemelen);
header->base += TStringBuf("://", 3);
header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg);
header->base += path;
- } else {
+ } else {
if (strlen(path) >= FETCHER_URL_MAX) {
header->error = HTTP_URL_TOO_LARGE;
return 0;
}
header->base = path;
- }
-
- if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) {
- header->error = (i16)ret;
- return 0;
- }
-
- bool inheader = 1;
+ }
+
+ if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) {
+ header->error = (i16)ret;
+ return 0;
+ }
+
+ bool inheader = 1;
void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr;
- ssize_t got;
- size_t buffree = 0, bufsize = 0, buflen = 0;
- size_t maxsize = TCheck::GetMaxHeaderSize();
- do {
- if (buffree < TCP_MIN) {
- if (buf) {
- TAlloc::Shrink(buf, buflen - buffree);
- if (TWriter::Write(buf, buflen - buffree) < 0) {
+ ssize_t got;
+ size_t buffree = 0, bufsize = 0, buflen = 0;
+ size_t maxsize = TCheck::GetMaxHeaderSize();
+ do {
+ if (buffree < TCP_MIN) {
+ if (buf) {
+ TAlloc::Shrink(buf, buflen - buffree);
+ if (TWriter::Write(buf, buflen - buffree) < 0) {
buf = nullptr;
- ret = EIO;
- break;
- }
- }
- if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) {
- ret = ENOMEM;
- break;
- }
- bufptr = buf;
- buffree = buflen;
- }
- if ((got = TAgent::read(bufptr, buffree)) < 0) {
- fetcherr = errno;
+ ret = EIO;
+ break;
+ }
+ }
+ if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) {
+ ret = ENOMEM;
+ break;
+ }
+ bufptr = buf;
+ buffree = buflen;
+ }
+ if ((got = TAgent::read(bufptr, buffree)) < 0) {
+ fetcherr = errno;
if (errno == EINTR)
header->error = HTTP_INTERRUPTED;
else if (errno == ETIMEDOUT)
@@ -102,43 +102,43 @@ public:
else
header->error = HTTP_CONNECTION_LOST;
- break;
- }
-
- parsebuf = bufptr;
- bufptr = (char*)bufptr + got;
- bufsize += got;
- buffree -= got;
-
- THttpParser<TCheck>::Parse(parsebuf, got);
-
- if (header->error)
+ break;
+ }
+
+ parsebuf = bufptr;
+ bufptr = (char*)bufptr + got;
+ bufsize += got;
+ buffree -= got;
+
+ THttpParser<TCheck>::Parse(parsebuf, got);
+
+ if (header->error)
break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached
-
- if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) {
- inheader = 0;
- if (TCheck::Check(header))
- break;
- if (header->header_size > (long)maxsize) {
- header->error = HTTP_HEADER_TOO_LARGE;
- break;
- }
+
+ if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) {
+ inheader = 0;
+ if (TCheck::Check(header))
+ break;
+ if (header->header_size > (long)maxsize) {
+ header->error = HTTP_HEADER_TOO_LARGE;
+ break;
+ }
}
if (!inheader) {
- maxsize = TCheck::GetMaxBodySize(header);
- }
- if (header->http_status >= HTTP_EXTENDED)
- break;
- if (bufsize > maxsize) {
- header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE;
- break;
- }
- if (TerminateNow) {
- header->error = HTTP_INTERRUPTED;
- break;
- }
- } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof);
-
+ maxsize = TCheck::GetMaxBodySize(header);
+ }
+ if (header->http_status >= HTTP_EXTENDED)
+ break;
+ if (bufsize > maxsize) {
+ header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE;
+ break;
+ }
+ if (TerminateNow) {
+ header->error = HTTP_INTERRUPTED;
+ break;
+ }
+ } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof);
+
i64 Adjustment = 0;
if (!header->error) {
if (header->transfer_chunked) {
@@ -150,22 +150,22 @@ public:
Adjustment = 0;
}
- if (buf) {
+ if (buf) {
TAlloc::Shrink(buf, buflen - buffree + Adjustment);
- if (TWriter::Write(buf, buflen - buffree) < 0)
- ret = EIO;
- }
- TCheck::CheckEndDoc(header);
- if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) {
- TAgent::Disconnect();
- if (!fetcherr)
- fetcherr = errno;
- }
- errno = fetcherr;
- return ret;
- }
-};
-
-template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent>
-int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0;
+ if (TWriter::Write(buf, buflen - buffree) < 0)
+ ret = EIO;
+ }
+ TCheck::CheckEndDoc(header);
+ if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) {
+ TAgent::Disconnect();
+ if (!fetcherr)
+ fetcherr = errno;
+ }
+ errno = fetcherr;
+ return ret;
+ }
+};
+
+template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent>
+int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0;
diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h
index 62a27b6561..c4abdcd0d2 100644
--- a/library/cpp/http/fetch/httpfsm.h
+++ b/library/cpp/http/fetch/httpfsm.h
@@ -1,13 +1,13 @@
#pragma once
-
+
#include "httpheader.h"
-#include <util/system/maxlen.h>
+#include <util/system/maxlen.h>
#include <util/datetime/parser.h>
-
+
#include <time.h>
-struct THttpHeaderParser {
+struct THttpHeaderParser {
static constexpr int ErrFirstlineTypeMismatch = -3;
static constexpr int ErrHeader = -2;
static constexpr int Err = -1;
@@ -16,34 +16,34 @@ struct THttpHeaderParser {
static constexpr int Accepted = 2;
int Execute(const void* inBuf, size_t len) {
- return execute((unsigned char*)inBuf, (int)len);
- }
-
+ return execute((unsigned char*)inBuf, (int)len);
+ }
+
int Execute(TStringBuf str) {
return Execute(str.data(), str.size());
}
int Init(THttpHeader* h) {
int ret = Init((THttpBaseHeader*)(h));
- hd = h;
- hd->Init();
+ hd = h;
+ hd->Init();
hreflangpos = hd->hreflangs;
hreflangspace = HREFLANG_MAX;
return ret;
- }
-
+ }
+
int Init(THttpAuthHeader* h) {
- int ret = Init((THttpHeader*)(h));
- auth_hd = h;
- return ret;
- }
+ int ret = Init((THttpHeader*)(h));
+ auth_hd = h;
+ return ret;
+ }
int Init(THttpRequestHeader* h) {
int ret = Init((THttpBaseHeader*)(h));
request_hd = h;
request_hd->Init();
return ret;
}
-
+
THttpHeader* hd;
long I;
int Dc;
@@ -51,7 +51,7 @@ struct THttpHeaderParser {
char buf[FETCHER_URL_MAX];
size_t buflen;
char* lastchar;
-
+
const unsigned char* langstart;
size_t langlen;
@@ -62,10 +62,10 @@ struct THttpHeaderParser {
THttpAuthHeader* auth_hd;
THttpRequestHeader* request_hd;
-
-private:
+
+private:
THttpBaseHeader* base_hd;
- int cs;
+ int cs;
private:
int Init(THttpBaseHeader* header) {
@@ -78,27 +78,27 @@ private:
}
int execute(unsigned char* inBuf, int len);
- void init();
-};
-
-struct THttpChunkParser {
+ void init();
+};
+
+struct THttpChunkParser {
int Execute(const void* inBuf, int len) {
- return execute((unsigned char*)inBuf, len);
- }
-
- int Init() {
- init();
- return 0;
- }
-
+ return execute((unsigned char*)inBuf, len);
+ }
+
+ int Init() {
+ init();
+ return 0;
+ }
+
int chunk_length;
char* lastchar;
long I;
int Dc;
i64 cnt64;
-
-private:
- int cs;
+
+private:
+ int cs;
int execute(unsigned char* inBuf, int len);
- void init();
-};
+ void init();
+};
diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6
index 83557b144e..eab0328b18 100644
--- a/library/cpp/http/fetch/httpfsm.rl6
+++ b/library/cpp/http/fetch/httpfsm.rl6
@@ -1,70 +1,70 @@
-#include <stdio.h>
-#include <time.h>
-
+#include <stdio.h>
+#include <time.h>
+
#include <library/cpp/charset/doccodes.h>
#include <library/cpp/charset/codepage.h>
#include <library/cpp/http/misc/httpcodes.h>
#include <util/datetime/base.h>
#include <util/generic/ylimits.h>
#include <algorithm> // max
-
+
#include <library/cpp/http/fetch/httpheader.h>
#include <library/cpp/http/fetch/httpfsm.h>
-
+
#ifdef _MSC_VER
#pragma warning(disable: 4702) // unreachable code
#endif
#define c(i) I = i;
#define m(i) I = std::max(I, (long)i);
-
-static inline int X(unsigned char c) {
- return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));
-}
-
-template <typename x>
-static inline void guard(x &val) {
- val = (val >= -1) ? -4 - val : -2; // f(-2) = -2
-}
-
-template <typename x>
-static inline void setguarded(x &val, long cnt) {
- val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;
-}
-
-////////////////////////////////////////////////////////////////////
-/// HTTP PARSER
-////////////////////////////////////////////////////////////////////
-
-%%{
-machine http_header_parser;
-
+
+static inline int X(unsigned char c) {
+ return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));
+}
+
+template <typename x>
+static inline void guard(x &val) {
+ val = (val >= -1) ? -4 - val : -2; // f(-2) = -2
+}
+
+template <typename x>
+static inline void setguarded(x &val, long cnt) {
+ val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;
+}
+
+////////////////////////////////////////////////////////////////////
+/// HTTP PARSER
+////////////////////////////////////////////////////////////////////
+
+%%{
+machine http_header_parser;
+
include HttpDateTimeParser "../../../../util/datetime/parser.rl6";
-alphtype unsigned char;
-
-################# 2.2 Basic Rules #################
-eol = '\r'? '\n';
-ws = [ \t];
-lw = '\r'? '\n'? ws;
+alphtype unsigned char;
+
+################# 2.2 Basic Rules #################
+eol = '\r'? '\n';
+ws = [ \t];
+lw = '\r'? '\n'? ws;
separator = [()<>@,;:\\"/\[\]?={}];
-token_char = [!-~] - separator; # http tokens chars
-url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
-text_char = ws | 33..126 | 128..255;
-any_text_char = any - [\r\n];
-
-lws = lw*;
-eoh = lws eol;
-token = token_char+;
-ex_token = (token_char | ws)* token_char;
-text = (text_char | lw)*;
-any_text = (any_text_char | lw)*;
-def = lws ':' lws;
-
+token_char = [!-~] - separator; # http tokens chars
+url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
+text_char = ws | 33..126 | 128..255;
+any_text_char = any - [\r\n];
+
+lws = lw*;
+eoh = lws eol;
+token = token_char+;
+ex_token = (token_char | ws)* token_char;
+text = (text_char | lw)*;
+any_text = (any_text_char | lw)*;
+def = lws ':' lws;
+
action clear_buf { buflen = 0; }
action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; }
-
-###################################################
+
+###################################################
############ response status line #################
action set_minor { base_hd->http_minor = I; }
action set_status {
@@ -75,14 +75,14 @@ action set_status {
return -3;
}
}
-
+
status_code = int3;
http_major = int;
http_minor = int;
reason_phrase = ws+ text_char*;
http_version = "http/"i http_major '.' http_minor %set_minor;
response_status_line = http_version ws+ status_code reason_phrase? eol %set_status;
-
+
############ request status line #################
action set_request_uri {
if (request_hd && buflen < FETCHER_URL_MAX) {
@@ -116,44 +116,44 @@ request_uri = (token_char | separator)+ >clear_buf $update_buf
%set_request_uri;
request_status_line = http_method ws+ request_uri ws+ http_version eoh;
-################# connection ######################
+################# connection ######################
action beg_connection { guard(base_hd->connection_closed); I = -1; }
action set_connection { setguarded(base_hd->connection_closed, I); }
-
-c_token = "close"i %{m(1)}
- | "keep-alive"i %{m(0)};
-c_tokenlist = c_token (lws ',' lws c_token)?;
+
+c_token = "close"i %{m(1)}
+ | "keep-alive"i %{m(0)};
+c_tokenlist = c_token (lws ',' lws c_token)?;
connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection;
-
-################# content-encoding ################
+
+################# content-encoding ################
action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; }
action set_content_encoding { base_hd->compression_method =
((base_hd->compression_method == HTTP_COMPRESSION_UNSET ||
base_hd->compression_method == I) ?
I : (int)HTTP_COMPRESSION_ERROR); }
-
-ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)}
- | "gzip"i %{c(HTTP_COMPRESSION_GZIP)}
- | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)}
- | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)}
- | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)}
- | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};
+
+ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)}
+ | "gzip"i %{c(HTTP_COMPRESSION_GZIP)}
+ | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)}
+ | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)}
+ | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)}
+ | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};
content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding;
-
-################# transfer-encoding ###############
+
+################# transfer-encoding ###############
action beg_encoding { guard(base_hd->transfer_chunked); }
action set_encoding { setguarded(base_hd->transfer_chunked, I); }
-
-e_tokenlist = "identity"i %{c(0)}
- | "chunked"i %{c(1)};
+
+e_tokenlist = "identity"i %{c(0)}
+ | "chunked"i %{c(1)};
transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding;
-
-################# content-length ##################
+
+################# content-length ##################
action beg_content_length { guard(base_hd->content_length); }
action set_content_length { setguarded(base_hd->content_length, I); }
-
+
content_length = "content-length"i def %beg_content_length int eoh %set_content_length;
-
+
################# content-range ###################
action beg_content_range_start { guard(base_hd->content_range_start); I = -1; }
action set_content_range_start { setguarded(base_hd->content_range_start, I); }
@@ -166,7 +166,7 @@ content_range = "content-range"i def "bytes"i sp %beg_content_range_start int
%beg_content_range_end int '/' %set_content_range_end
%beg_content_range_el int eoh %set_content_range_el;
-################# accept-ranges ###################
+################# accept-ranges ###################
action beg_accept_ranges {
if (hd) {
guard(hd->accept_ranges);
@@ -174,21 +174,21 @@ action beg_accept_ranges {
}
}
action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); }
-
-ar_tokenlist = "bytes"i %{c(1)}
- | "none"i %{c(0)};
+
+ar_tokenlist = "bytes"i %{c(1)}
+ | "none"i %{c(0)};
accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges;
-
-################# content-type ####################
+
+################# content-type ####################
action beg_mime { guard(base_hd->mime_type); }
action set_mime { setguarded(base_hd->mime_type, I); }
action set_charset {
if (buflen < FETCHER_URL_MAX) {
- buf[buflen++] = 0;
+ buf[buflen++] = 0;
base_hd->charset = EncodingHintByName((const char*)buf);
- }
-}
-
+ }
+}
+
mime_type = "text/plain"i %{c(MIME_TEXT)}
| "text/html"i %{c(MIME_HTML)}
| "application/pdf"i %{c(MIME_PDF)}
@@ -234,36 +234,36 @@ mime_type = "text/plain"i %{c(MIME_TEXT)}
charset_name = token_char+ >clear_buf $update_buf;
mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2
- | token ws* '=' ws* '"'? token '"'? @1
- | text $0;
-mime_parms = (lws ';' lws mime_param)*;
+ | token ws* '=' ws* '"'? token '"'? @1
+ | text $0;
+mime_parms = (lws ';' lws mime_param)*;
content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime;
-
-################# last modified ###################
+
+################# last modified ###################
action beg_modtime { guard(base_hd->http_time); }
action set_modtime {
setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1));
}
-
+
last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime;
-
-################# location ########################
+
+################# location ########################
action set_location {
while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) {
buflen --;
}
if (hd && buflen < FETCHER_URL_MAX) {
hd->location = TStringBuf(buf, buflen);
- }
-}
-
+ }
+}
+
action set_status_303{ if (hd) hd->http_status = 303; }
-
+
url = url_char+ >clear_buf $update_buf;
loc_url = any_text_char+ >clear_buf $update_buf;
location = "location"i def loc_url eoh %set_location;
refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location;
-
+
################# x-robots-tag ################
action set_x_robots {
if (hd && AcceptingXRobots) {
@@ -349,56 +349,56 @@ action set_squid_error {
squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error;
-################# auth ########################
+################# auth ########################
action init_auth {
- if (auth_hd)
- auth_hd->use_auth=true;
-}
-
+ if (auth_hd)
+ auth_hd->use_auth=true;
+}
+
action update_auth_buf
- { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }
-
-quoted_str = /"/ (text_char - /"/)* /"/ >2;
+ { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }
+
+quoted_str = /"/ (text_char - /"/)* /"/ >2;
auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2;
-
-# do not support auth-int, too heavy procedure
-
-qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };
-
-qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );
-
-auth_good_param = ( "nonce"i /=/ auth_quoted_str )
+
+# do not support auth-int, too heavy procedure
+
+qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };
+
+qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );
+
+auth_good_param = ( "nonce"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->nonce = strdup((const char*)buf);
- }}
- | ( "realm"i /=/ auth_quoted_str )
+ buf[buflen++] = 0;
+ auth_hd->nonce = strdup((const char*)buf);
+ }}
+ | ( "realm"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->realm = strdup((const char*)buf);
- }}
- | ( "opaque"i /=/ auth_quoted_str )
+ buf[buflen++] = 0;
+ auth_hd->realm = strdup((const char*)buf);
+ }}
+ | ( "opaque"i /=/ auth_quoted_str )
%{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
- buf[buflen++] = 0;
- auth_hd->opaque = strdup((const char*)buf);
- }}
- | "stale"i /=/ "true"i
- %{if (auth_hd) auth_hd->stale = true; }
- | "algorithm"i /=/ "md5"i /-/ "sess"i
- %{if (auth_hd) auth_hd->algorithm = 1; }
- | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);
-
-auth_param = auth_good_param @1 |
- ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )
- /=/ (token | quoted_str ) ) $0;
-
-auth_params = auth_param ( ws* /,/ ws* auth_param )*;
-
+ buf[buflen++] = 0;
+ auth_hd->opaque = strdup((const char*)buf);
+ }}
+ | "stale"i /=/ "true"i
+ %{if (auth_hd) auth_hd->stale = true; }
+ | "algorithm"i /=/ "md5"i /-/ "sess"i
+ %{if (auth_hd) auth_hd->algorithm = 1; }
+ | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);
+
+auth_param = auth_good_param @1 |
+ ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )
+ /=/ (token | quoted_str ) ) $0;
+
+auth_params = auth_param ( ws* /,/ ws* auth_param )*;
+
digest_challenge = ("digest"i %init_auth ws+ auth_params) |
- ((token-"digest"i) text);
-
-auth = "www-authenticate"i def digest_challenge eoh;
-
+ ((token-"digest"i) text);
+
+auth = "www-authenticate"i def digest_challenge eoh;
+
###################### host #######################
action set_host {
if (request_hd && buflen < HOST_MAX) {
@@ -562,8 +562,8 @@ action set_request_priority {
request_priority = "x-yandex-request-priority"i def int eoh
%set_request_priority;
-################# message header ##################
-other_header = ( ex_token - "www-authenticate"i ) def any_text eoh;
+################# message header ##################
+other_header = ( ex_token - "www-authenticate"i ) def any_text eoh;
message_header = other_header $0
| connection @1
| content_encoding @1
@@ -595,90 +595,90 @@ request_header = message_header $0
| request_cache_control @1
| response_timeout @1
| request_priority @1;
-
-################# main ############################
+
+################# main ############################
action accepted { lastchar = (char*)fpc; return 2; }
-
+
main := ((response_status_line ('\r'? response_header)*)
| (request_status_line ('\r' ? request_header)*))
eol @accepted;
-
-}%%
-
-%% write data;
-
-int THttpHeaderParser::execute(unsigned char *inBuf, int len) {
- const unsigned char *p = inBuf;
- const unsigned char *pe = p + len;
- %% write exec;
- if (cs == http_header_parser_error)
- return -1;
- else if (cs == http_header_parser_first_final)
- return 0;
- else
- return 1;
-}
-
-void THttpHeaderParser::init() {
- %% write init;
-}
-
-%%{
-machine http_chunk_parser;
-
-alphtype unsigned char;
-
+
+}%%
+
+%% write data;
+
+int THttpHeaderParser::execute(unsigned char *inBuf, int len) {
+ const unsigned char *p = inBuf;
+ const unsigned char *pe = p + len;
+ %% write exec;
+ if (cs == http_header_parser_error)
+ return -1;
+ else if (cs == http_header_parser_first_final)
+ return 0;
+ else
+ return 1;
+}
+
+void THttpHeaderParser::init() {
+ %% write init;
+}
+
+%%{
+machine http_chunk_parser;
+
+alphtype unsigned char;
+
action clear_hex { cnt64 = 0; }
action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; }
action set_chunk { chunk_length = static_cast<int>(cnt64); }
action accepted { lastchar = (char*)fpc; return 2; }
-
-eol = '\r'? '\n';
-ws = [ \t];
-sp = ' ';
-lw = '\r'? '\n'? ws;
-separator = [()<>@,;:\\"/\[\]?={}];
-token_char = [!-~] - separator; # http tokens chars
-url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
-text_char = ws | 33..127 | 160..255;
-
-lws = lw*;
-eoh = lws eol;
-token = token_char+;
-text = (text_char | lw)*;
-def = lws ':' lws;
-
+
+eol = '\r'? '\n';
+ws = [ \t];
+sp = ' ';
+lw = '\r'? '\n'? ws;
+separator = [()<>@,;:\\"/\[\]?={}];
+token_char = [!-~] - separator; # http tokens chars
+url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
+text_char = ws | 33..127 | 160..255;
+
+lws = lw*;
+eoh = lws eol;
+token = token_char+;
+text = (text_char | lw)*;
+def = lws ':' lws;
+
hex = (xdigit+) >clear_hex $update_hex;
-quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"';
-
-chunk_ext_val = token | quoted_string;
-chunk_ext_name = token;
-chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;
-
-entity_header = token def text eoh;
-trailer = entity_header*;
-
+quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"';
+
+chunk_ext_val = token | quoted_string;
+chunk_ext_name = token;
+chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;
+
+entity_header = token def text eoh;
+trailer = entity_header*;
+
chunk = (hex - '0'+) chunk_extension? %set_chunk;
-last_chunk = '0'+ chunk_extension? eol trailer;
+last_chunk = '0'+ chunk_extension? eol trailer;
main := eol (chunk $0 | last_chunk @1) eol @accepted;
-
-}%%
-
-%% write data;
-
-int THttpChunkParser::execute(unsigned char *inBuf, int len) {
- const unsigned char *p = inBuf;
- const unsigned char *pe = p + len;
- %% write exec;
- if (cs == http_chunk_parser_error)
- return -1;
- else if (cs == http_chunk_parser_first_final)
- return 0;
- else
- return 1;
-}
-
-void THttpChunkParser::init() {
- chunk_length = 0;
- %% write init;
-}
+
+}%%
+
+%% write data;
+
+int THttpChunkParser::execute(unsigned char *inBuf, int len) {
+ const unsigned char *p = inBuf;
+ const unsigned char *pe = p + len;
+ %% write exec;
+ if (cs == http_chunk_parser_error)
+ return -1;
+ else if (cs == http_chunk_parser_first_final)
+ return 0;
+ else
+ return 1;
+}
+
+void THttpChunkParser::init() {
+ chunk_length = 0;
+ %% write init;
+}
diff --git a/library/cpp/http/fetch/httpheader.h b/library/cpp/http/fetch/httpheader.h
index 20f8e0956b..b2810bbd41 100644
--- a/library/cpp/http/fetch/httpheader.h
+++ b/library/cpp/http/fetch/httpheader.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "exthttpcodes.h"
#include <library/cpp/mime/types/mime.h>
@@ -11,11 +11,11 @@
#include <util/system/maxlen.h>
#include <ctime>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
#include <algorithm>
-
+
// This is ugly solution but here a lot of work to do it the right way.
#define FETCHER_URL_MAX 8192
@@ -29,16 +29,16 @@ extern const i32 DEFAULT_RESPONSE_TIMEOUT; /// == -1
#define MAX_LANGREGION_LEN 4
#define MAXWORD_LEN 55
-enum HTTP_COMPRESSION {
+enum HTTP_COMPRESSION {
HTTP_COMPRESSION_UNSET = 0,
HTTP_COMPRESSION_ERROR = 1,
- HTTP_COMPRESSION_IDENTITY = 2,
+ HTTP_COMPRESSION_IDENTITY = 2,
HTTP_COMPRESSION_GZIP = 3,
HTTP_COMPRESSION_DEFLATE = 4,
- HTTP_COMPRESSION_COMPRESS = 5,
+ HTTP_COMPRESSION_COMPRESS = 5,
HTTP_COMPRESSION_MAX = 6
-};
-
+};
+
enum HTTP_METHOD {
HTTP_METHOD_UNDEFINED = -1,
HTTP_METHOD_OPTIONS,
@@ -78,25 +78,25 @@ public:
TString base;
public:
- void Init() {
+ void Init() {
error = 0;
- header_size = 0;
- entity_size = 0;
- content_length = -1;
- http_time = -1;
- http_minor = -1;
- mime_type = -1;
- charset = -1;
- compression_method = HTTP_COMPRESSION_UNSET;
- transfer_chunked = -1;
+ header_size = 0;
+ entity_size = 0;
+ content_length = -1;
+ http_time = -1;
+ http_minor = -1;
+ mime_type = -1;
+ charset = -1;
+ compression_method = HTTP_COMPRESSION_UNSET;
+ transfer_chunked = -1;
connection_closed = HTTP_CONNECTION_UNDEFINED;
content_range_start = -1;
content_range_end = -1;
content_range_entity_length = -1;
base.clear();
- }
-
- void Print() const {
+ }
+
+ void Print() const {
printf("content_length: %" PRIi64 "\n", content_length);
printf("http_time: %" PRIi64 "\n", http_time);
printf("http_minor: %" PRIi8 "\n", http_minor);
@@ -110,22 +110,22 @@ public:
printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length);
printf("base: \"%s\"\n", base.c_str());
printf("error: %" PRIi16 "\n", error);
- }
-
- int SetBase(const char* path,
+ }
+
+ int SetBase(const char* path,
const char* hostNamePtr = nullptr,
int hostNameLength = 0) {
if (*path == '/') {
base = "http://";
base += TStringBuf(hostNamePtr, hostNameLength);
base += path;
- } else {
+ } else {
base = path;
- }
- return error;
- }
-};
-
+ }
+ return error;
+ }
+};
+
enum { HREFLANG_MAX = FETCHER_URL_MAX * 2 };
/// Class represents Http Response Header.
struct THttpHeader: public THttpBaseHeader {
@@ -165,7 +165,7 @@ public:
};
struct THttpRequestHeader: public THttpBaseHeader {
-public:
+public:
TString request_uri;
char host[HOST_MAX];
char from[MAXWORD_LEN];
@@ -184,7 +184,7 @@ public:
THttpRequestHeader() {
Init();
}
-
+
void Init() {
request_uri.clear();
host[0] = 0;
@@ -201,7 +201,7 @@ public:
if_modified_since = DEFAULT_IF_MODIFIED_SINCE;
THttpBaseHeader::Init();
}
-
+
void Print() const {
THttpBaseHeader::Print();
printf("request_uri: \"%s\"\n", request_uri.c_str());
@@ -213,7 +213,7 @@ public:
printf("max_age: %" PRIi32 "\n", max_age);
printf("if_modified_since: %" PRIi64 "\n", if_modified_since);
}
-
+
/// It doesn't care about errors in request or headers, where
/// request_uri equals to '*'.
/// This returns copy of the string, which you have to delete.
@@ -225,20 +225,20 @@ public:
url = HTTP_PREFIX;
url += host;
url += request_uri;
- }
+ }
return url;
}
-
+
char* GetUrl(char* buffer, size_t size) {
if (host[0] == 0 || !strcmp(host, "")) {
strlcpy(buffer, request_uri.c_str(), size);
} else {
snprintf(buffer, size, "http://%s%s", host, request_uri.c_str());
- }
+ }
return buffer;
}
};
-
+
class THttpAuthHeader: public THttpHeader {
public:
char* realm;
@@ -282,6 +282,6 @@ public:
printf("stale: %d\n", stale);
printf("algorithm: %d\n", algorithm);
printf("qop_auth: %d\n", qop_auth);
- }
+ }
}
-};
+};
diff --git a/library/cpp/http/fetch/httpload.cpp b/library/cpp/http/fetch/httpload.cpp
index f944d7906a..82ea8900b5 100644
--- a/library/cpp/http/fetch/httpload.cpp
+++ b/library/cpp/http/fetch/httpload.cpp
@@ -1,7 +1,7 @@
-#include "httpload.h"
-
-/************************************************************/
-/************************************************************/
+#include "httpload.h"
+
+/************************************************************/
+/************************************************************/
httpAgentReader::httpAgentReader(httpSpecialAgent& agent,
const char* baseUrl,
bool assumeConnectionClosed,
@@ -13,109 +13,109 @@ httpAgentReader::httpAgentReader(httpSpecialAgent& agent,
, BufPtr_(Buffer_)
, BufSize_(bufSize)
, BufRest_(0)
-{
- HeadRequest = false;
+{
+ HeadRequest = false;
Header = &Header_;
- if (use_auth)
+ if (use_auth)
HeaderParser.Init(&Header_);
- else
- HeaderParser.Init(Header);
+ else
+ HeaderParser.Init(Header);
setAssumeConnectionClosed(assumeConnectionClosed ? 1 : 0);
Header_.SetBase(baseUrl);
-
+
if (Header_.error)
- State = hp_error;
- else
- State = hp_in_header;
-}
-
-/************************************************************/
+ State = hp_error;
+ else
+ State = hp_in_header;
+}
+
+/************************************************************/
httpAgentReader::~httpAgentReader() {
delete[] Buffer_;
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpAgentReader::readBuf() {
assert(BufRest_ == 0);
if (!BufPtr_) {
BufRest_ = -1;
- return;
- }
-
+ return;
+ }
+
BufRest_ = Agent_.read(Buffer_, BufSize_);
if (BufRest_ <= 0) {
BufRest_ = -1;
BufPtr_ = nullptr;
- } else {
+ } else {
BufPtr_ = Buffer_;
- //cout << "BUF: " << mBuffer << endl << endl;
- }
-}
-
-/************************************************************/
+ //cout << "BUF: " << mBuffer << endl << endl;
+ }
+}
+
+/************************************************************/
const THttpHeader* httpAgentReader::readHeader() {
while (State == hp_in_header) {
if (!step()) {
Header_.error = HTTP_CONNECTION_LOST;
return nullptr;
- }
+ }
ParseGeneric(BufPtr_, BufRest_);
- }
+ }
if (State == hp_eof || State == hp_error) {
BufPtr_ = nullptr;
BufRest_ = -1;
- }
+ }
if (State == hp_error || Header_.error)
return nullptr;
return &Header_;
-}
-
-/************************************************************/
+}
+
+/************************************************************/
long httpAgentReader::readPortion(void*& buf) {
assert(State != hp_in_header);
-
- long Chunk = 0;
+
+ long Chunk = 0;
do {
if (BufSize_ == 0 && !BufPtr_)
- return 0;
-
- if (!step())
- return 0;
-
+ return 0;
+
+ if (!step())
+ return 0;
+
Chunk = ParseGeneric(BufPtr_, BufRest_);
buf = BufPtr_;
-
+
if (State == hp_error && Header_.entity_size > Header_.content_length) {
Chunk -= (Header_.entity_size - Header_.content_length);
BufPtr_ = (char*)BufPtr_ + Chunk;
BufRest_ = 0;
State = hp_eof;
Header_.error = 0;
- break;
- }
-
+ break;
+ }
+
BufPtr_ = (char*)BufPtr_ + Chunk;
BufRest_ -= Chunk;
-
+
if (State == hp_eof || State == hp_error) {
BufRest_ = -1;
BufPtr_ = nullptr;
- }
- } while (!Chunk);
- return Chunk;
-}
-
-/************************************************************/
+ }
+ } while (!Chunk);
+ return Chunk;
+}
+
+/************************************************************/
bool httpAgentReader::skipTheRest() {
void* b;
- while (!eof())
- readPortion(b);
+ while (!eof())
+ readPortion(b);
return (State == hp_eof);
-}
-
-/************************************************************/
-/************************************************************/
+}
+
+/************************************************************/
+/************************************************************/
httpLoadAgent::httpLoadAgent(bool handleAuthorization,
socketHandlerFactory& factory)
: Factory_(factory)
@@ -126,82 +126,82 @@ httpLoadAgent::httpLoadAgent(bool handleAuthorization,
, Headers_()
, ErrCode_(0)
, RealHost_(nullptr)
-{
-}
-
-/************************************************************/
+{
+}
+
+/************************************************************/
httpLoadAgent::~httpLoadAgent() {
delete Reader_;
free(RealHost_);
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpLoadAgent::clearReader() {
if (Reader_) {
- bool opened = false;
+ bool opened = false;
if (PersistentConn_) {
const THttpHeader* H = Reader_->readHeader();
if (H && !H->connection_closed) {
Reader_->skipTheRest();
- opened = true;
- }
- }
- if (!opened)
- Disconnect();
+ opened = true;
+ }
+ }
+ if (!opened)
+ Disconnect();
delete Reader_;
Reader_ = nullptr;
- }
+ }
ErrCode_ = 0;
-}
-/************************************************************/
+}
+/************************************************************/
void httpLoadAgent::setRealHost(const char* hostname) {
free(RealHost_);
- if (hostname)
+ if (hostname)
RealHost_ = strdup(hostname);
- else
+ else
RealHost_ = nullptr;
ErrCode_ = 0;
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpLoadAgent::setIMS(const char* ifModifiedSince) {
- char ims_buf[100];
- snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n",
- ifModifiedSince);
+ char ims_buf[100];
+ snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n",
+ ifModifiedSince);
Headers_.push_back(ims_buf);
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpLoadAgent::addHeaderInstruction(const char* instr) {
Headers_.push_back(instr);
-}
-
-/************************************************************/
+}
+
+/************************************************************/
void httpLoadAgent::dropHeaderInstructions() {
Headers_.clear();
-}
-
-/************************************************************/
+}
+
+/************************************************************/
bool httpLoadAgent::startRequest(const THttpURL& url,
bool persistent,
const TAddrList& addrs)
-{
- clearReader();
+{
+ clearReader();
ErrCode_ = 0;
-
+
URL_.Clear();
URL_ = url;
PersistentConn_ = persistent;
if (!URL_.IsValidAbs())
- return false;
+ return false;
if (!HandleAuthorization_ && !URL_.IsNull(THttpURL::FlagAuth))
- return false;
-
+ return false;
+
return doSetHost(addrs) && doStartRequest();
-}
-
-/************************************************************/
+}
+
+/************************************************************/
bool httpLoadAgent::startRequest(const char* url,
const char* url_to_merge,
bool persistent,
@@ -226,148 +226,148 @@ bool httpLoadAgent::startRequest(const char* url,
const char* url_to_merge,
bool persistent,
ui32 ip) {
- clearReader();
-
+ clearReader();
+
URL_.Clear();
PersistentConn_ = persistent;
-
- long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;
+
+ long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;
if (HandleAuthorization_)
- flags |= THttpURL::FeatureAuthSupported;
-
+ flags |= THttpURL::FeatureAuthSupported;
+
if (URL_.Parse(url, flags, url_to_merge) || !URL_.IsValidGlobal())
- return false;
-
+ return false;
+
return doSetHost(TAddrList::MakeV4Addr(ip, URL_.GetPort())) && doStartRequest();
-}
-
-/************************************************************/
+}
+
+/************************************************************/
bool httpLoadAgent::doSetHost(const TAddrList& addrs) {
socketAbstractHandler* h = Factory_.chooseHandler(URL_);
- if (!h)
- return false;
- Socket.setHandler(h);
-
+ if (!h)
+ return false;
+ Socket.setHandler(h);
+
if (addrs.size()) {
ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost),
URL_.GetPort(), addrs);
- } else {
+ } else {
ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost),
URL_.GetPort());
- }
+ }
if (ErrCode_)
- return false;
-
+ return false;
+
if (RealHost_) {
- free(Hostheader);
+ free(Hostheader);
Hostheader = (char*)malloc(strlen(RealHost_) + 20);
sprintf(Hostheader, "Host: %s\r\n", RealHost_);
- }
-
+ }
+
if (!URL_.IsNull(THttpURL::FlagAuth)) {
if (!HandleAuthorization_) {
ErrCode_ = HTTP_UNAUTHORIZED;
- return false;
- }
-
+ return false;
+ }
+
Digest_.setAuthorization(URL_.Get(THttpURL::FieldUsername),
URL_.Get(THttpURL::FieldPassword));
- }
-
- return true;
-}
-
-/************************************************************/
+ }
+
+ return true;
+}
+
+/************************************************************/
bool httpLoadAgent::setHost(const char* host_url,
const TAddrList& addrs) {
- clearReader();
-
+ clearReader();
+
URL_.Clear();
PersistentConn_ = true;
-
- long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;
+
+ long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;
if (HandleAuthorization_)
- flags |= THttpURL::FeatureAuthSupported;
-
+ flags |= THttpURL::FeatureAuthSupported;
+
if (URL_.Parse(host_url, flags) || !URL_.IsValidGlobal())
- return false;
-
+ return false;
+
return doSetHost(addrs);
-}
-
-/************************************************************/
+}
+
+/************************************************************/
bool httpLoadAgent::startOneRequest(const char* local_url) {
- clearReader();
-
- THttpURL lURL;
+ clearReader();
+
+ THttpURL lURL;
if (lURL.Parse(local_url, THttpURL::FeaturesNormalizeSet) || lURL.IsValidGlobal())
- return false;
-
+ return false;
+
URL_.SetInMemory(THttpURL::FieldPath, lURL.Get(THttpURL::FieldPath));
URL_.SetInMemory(THttpURL::FieldQuery, lURL.Get(THttpURL::FieldQuery));
URL_.Rewrite();
-
- return doStartRequest();
-}
-
-/************************************************************/
+
+ return doStartRequest();
+}
+
+/************************************************************/
bool httpLoadAgent::doStartRequest() {
TString urlStr = URL_.PrintS(THttpURL::FlagPath | THttpURL::FlagQuery);
- if (!urlStr)
- urlStr = "/";
-
+ if (!urlStr)
+ urlStr = "/";
+
for (int step = 0; step < 10; step++) {
const char* digestHeader = Digest_.getHeaderInstruction();
-
+
unsigned i = (digestHeader) ? 2 : 1;
- const char** headers =
+ const char** headers =
(const char**)(alloca((i + Headers_.size()) * sizeof(char*)));
-
+
for (i = 0; i < Headers_.size(); i++)
headers[i] = Headers_[i].c_str();
- if (digestHeader)
- headers[i++] = digestHeader;
+ if (digestHeader)
+ headers[i++] = digestHeader;
headers[i] = nullptr;
-
+
ErrCode_ = RequestGet(urlStr.c_str(), headers, PersistentConn_);
-
+
if (ErrCode_) {
- Disconnect();
- return false;
- }
-
+ Disconnect();
+ return false;
+ }
+
TString urlBaseStr = URL_.PrintS(THttpURL::FlagNoFrag);
-
- clearReader();
+
+ clearReader();
Reader_ = new httpAgentReader(*this, urlBaseStr.c_str(),
!PersistentConn_, !Digest_.empty());
-
+
if (Reader_->readHeader()) {
- //mReader->getHeader()->Print();
+ //mReader->getHeader()->Print();
if (getHeader()->http_status == HTTP_UNAUTHORIZED &&
step < 1 &&
Digest_.processHeader(getAuthHeader(),
urlStr.c_str(),
"GET")) {
- //mReader->skipTheRest();
+ //mReader->skipTheRest();
delete Reader_;
Reader_ = nullptr;
ErrCode_ = 0;
- Disconnect();
- continue;
- }
-
- return true;
- }
- Disconnect();
- clearReader();
-
- return false;
- }
-
+ Disconnect();
+ continue;
+ }
+
+ return true;
+ }
+ Disconnect();
+ clearReader();
+
+ return false;
+ }
+
ErrCode_ = HTTP_UNAUTHORIZED;
- return false;
-}
-
-/************************************************************/
-/************************************************************/
+ return false;
+}
+
+/************************************************************/
+/************************************************************/
diff --git a/library/cpp/http/fetch/httpload.h b/library/cpp/http/fetch/httpload.h
index 1441dd27b5..e22e4b809e 100644
--- a/library/cpp/http/fetch/httpload.h
+++ b/library/cpp/http/fetch/httpload.h
@@ -1,226 +1,226 @@
#pragma once
-
+
#include "httpagent.h"
#include "httpparser.h"
#include "http_digest.h"
-#include <util/system/compat.h>
-#include <util/string/vector.h>
+#include <util/system/compat.h>
+#include <util/string/vector.h>
#include <util/network/ip.h>
#include <library/cpp/uri/http_url.h>
#include <library/cpp/http/misc/httpcodes.h>
-
-/********************************************************/
-// Section 1: socket handlers
-/********************************************************/
-// The following classes allows to adopt template scheme
-// THttpAgent for work with socket by flexible
-// object-style scheme.
-
-/********************************************************/
-// This class is used as a base one for flexible
-// socket handling
+
+/********************************************************/
+// Section 1: socket handlers
+/********************************************************/
+// The following classes allows to adopt template scheme
+// THttpAgent for work with socket by flexible
+// object-style scheme.
+
+/********************************************************/
+// This class is used as a base one for flexible
+// socket handling
class socketAbstractHandler {
-public:
- virtual bool Good() = 0;
-
+public:
+ virtual bool Good() = 0;
+
virtual int Connect(const TAddrList& addrs, TDuration Timeout) = 0;
-
- virtual void Disconnect() = 0;
-
- virtual void shutdown() = 0;
-
- virtual bool send(const char* message, ssize_t messlen) = 0;
-
- virtual bool peek() = 0;
-
- virtual ssize_t read(void* buffer, ssize_t buflen) = 0;
-
+
+ virtual void Disconnect() = 0;
+
+ virtual void shutdown() = 0;
+
+ virtual bool send(const char* message, ssize_t messlen) = 0;
+
+ virtual bool peek() = 0;
+
+ virtual ssize_t read(void* buffer, ssize_t buflen) = 0;
+
virtual ~socketAbstractHandler() {
}
-
-protected:
+
+protected:
socketAbstractHandler() {
}
-};
-
-/********************************************************/
-// This class is used as a proxy between THttpAgent and
-// socketAbstractHandler
-// (it is used by template scheme,
-// so it does not have virtual methods)
+};
+
+/********************************************************/
+// This class is used as a proxy between THttpAgent and
+// socketAbstractHandler
+// (it is used by template scheme,
+// so it does not have virtual methods)
class TSocketHandlerPtr {
-protected:
+protected:
socketAbstractHandler* Handler_;
-
-public:
+
+public:
TSocketHandlerPtr()
: Handler_(nullptr)
{
}
-
+
virtual ~TSocketHandlerPtr() {
delete Handler_;
}
-
+
int Good() {
return (Handler_ && Handler_->Good());
}
-
+
int Connect(const TAddrList& addrs, TDuration Timeout) {
return (Handler_) ? Handler_->Connect(addrs, Timeout) : 1;
- }
-
+ }
+
void Disconnect() {
if (Handler_)
Handler_->Disconnect();
- }
-
+ }
+
void shutdown() {
if (Handler_)
Handler_->shutdown();
- }
-
+ }
+
bool send(const char* message, ssize_t messlen) {
return (Handler_) ? Handler_->send(message, messlen) : false;
- }
-
+ }
+
virtual bool peek() {
return (Handler_) ? Handler_->peek() : false;
- }
-
+ }
+
virtual ssize_t read(void* buffer, ssize_t buflen) {
return (Handler_) ? Handler_->read(buffer, buflen) : 0;
- }
-
+ }
+
void setHandler(socketAbstractHandler* handler) {
if (Handler_)
delete Handler_;
Handler_ = handler;
- }
-};
-
-/********************************************************/
-// Here is httpAgent that uses socketAbstractHandler class
-// ant its derivatives
+ }
+};
+
+/********************************************************/
+// Here is httpAgent that uses socketAbstractHandler class
+// ant its derivatives
using httpSpecialAgent = THttpAgent<TSocketHandlerPtr>;
-
-/********************************************************/
-// Regular handler is used as implementation of
-// socketAbstractHandler for work through HTTP protocol
+
+/********************************************************/
+// Regular handler is used as implementation of
+// socketAbstractHandler for work through HTTP protocol
class socketRegularHandler: public socketAbstractHandler {
protected:
TSimpleSocketHandler Socket_;
-
+
public:
socketRegularHandler()
: Socket_()
{
}
-
+
bool Good() override {
return Socket_.Good();
- }
-
+ }
+
int Connect(const TAddrList& addrs, TDuration Timeout) override {
return Socket_.Connect(addrs, Timeout);
- }
-
+ }
+
void Disconnect() override {
Socket_.Disconnect();
- }
-
+ }
+
void shutdown() override {
- //Do not block writing to socket
- //There are servers that works in a bad way with this
- //mSocket.shutdown();
- }
-
+ //Do not block writing to socket
+ //There are servers that works in a bad way with this
+ //mSocket.shutdown();
+ }
+
bool send(const char* message, ssize_t messlen) override {
return Socket_.send(message, messlen);
- }
-
+ }
+
bool peek() override {
return Socket_.peek();
- }
-
+ }
+
ssize_t read(void* buffer, ssize_t buflen) override {
return Socket_.read(buffer, buflen);
- }
-};
-
-/********************************************************/
-// The base factory that allows to choose an appropriate
-// socketAbstractHandler implementation by url schema
+ }
+};
+
+/********************************************************/
+// The base factory that allows to choose an appropriate
+// socketAbstractHandler implementation by url schema
class socketHandlerFactory {
public:
virtual ~socketHandlerFactory() {
}
-
- //returns mHandler_HTTP for correct HTTP-based url
+
+ //returns mHandler_HTTP for correct HTTP-based url
virtual socketAbstractHandler* chooseHandler(const THttpURL& url);
-
- static socketHandlerFactory sInstance;
-};
-
-/********************************************************/
-// Section 2: the configurates tool to parse an HTTP-response
-/********************************************************/
-
+
+ static socketHandlerFactory sInstance;
+};
+
+/********************************************************/
+// Section 2: the configurates tool to parse an HTTP-response
+/********************************************************/
+
class httpAgentReader: public THttpParserGeneric<1> {
protected:
THttpAuthHeader Header_;
httpSpecialAgent& Agent_;
-
+
char* Buffer_;
void* BufPtr_;
int BufSize_;
long BufRest_;
-
- void readBuf();
-
+
+ void readBuf();
+
bool step() {
if (BufRest_ == 0)
- readBuf();
- if (eof())
- return false;
- return true;
- }
-
+ readBuf();
+ if (eof())
+ return false;
+ return true;
+ }
+
public:
httpAgentReader(httpSpecialAgent& agent,
const char* baseUrl,
bool assumeConnectionClosed,
bool use_auth = false,
int bufSize = 0x1000);
-
- ~httpAgentReader();
-
+
+ ~httpAgentReader();
+
bool eof() {
return BufRest_ < 0;
- }
-
+ }
+
int error() {
return Header_.error;
- }
-
+ }
+
void setError(int errCode) {
Header_.error = errCode;
- }
-
+ }
+
const THttpAuthHeader* getAuthHeader() {
return &Header_;
- }
-
+ }
+
const THttpHeader* readHeader();
long readPortion(void*& buf);
bool skipTheRest();
-};
-
-/********************************************************/
-// Section 3: the main class
-/********************************************************/
+};
+
+/********************************************************/
+// Section 3: the main class
+/********************************************************/
class httpLoadAgent: public httpSpecialAgent {
protected:
socketHandlerFactory& Factory_;
@@ -232,76 +232,76 @@ protected:
int ErrCode_;
char* RealHost_;
httpDigestHandler Digest_;
-
- void clearReader();
+
+ void clearReader();
bool doSetHost(const TAddrList& addrs);
- bool doStartRequest();
-
+ bool doStartRequest();
+
public:
httpLoadAgent(bool handleAuthorization = false,
socketHandlerFactory& factory = socketHandlerFactory::sInstance);
- ~httpLoadAgent();
-
- void setRealHost(const char* host);
+ ~httpLoadAgent();
+
+ void setRealHost(const char* host);
void setIMS(const char* ifModifiedSince);
- void addHeaderInstruction(const char* instr);
- void dropHeaderInstructions();
-
- bool startRequest(const char* url,
+ void addHeaderInstruction(const char* instr);
+ void dropHeaderInstructions();
+
+ bool startRequest(const char* url,
const char* url_to_merge = nullptr,
bool persistent = false,
const TAddrList& addrs = TAddrList());
-
+
// deprecated v4-only
bool startRequest(const char* url,
const char* url_to_merge,
bool persistent,
ui32 ip);
- bool startRequest(const THttpURL& url,
+ bool startRequest(const THttpURL& url,
bool persistent = false,
const TAddrList& addrs = TAddrList());
-
+
bool setHost(const char* host_url,
const TAddrList& addrs = TAddrList());
-
- bool startOneRequest(const char* local_url);
-
+
+ bool startOneRequest(const char* local_url);
+
const THttpAuthHeader* getAuthHeader() {
if (Reader_ && Reader_->getAuthHeader()->use_auth)
return Reader_->getAuthHeader();
return nullptr;
- }
-
+ }
+
const THttpHeader* getHeader() {
if (Reader_)
return Reader_->getAuthHeader();
return nullptr;
- }
-
+ }
+
const THttpURL& getURL() {
return URL_;
- }
-
+ }
+
bool eof() {
if (Reader_)
return Reader_->eof();
- return true;
- }
-
+ return true;
+ }
+
int error() {
if (ErrCode_)
return ErrCode_;
if (Reader_)
return Reader_->error();
- return HTTP_BAD_URL;
- }
-
+ return HTTP_BAD_URL;
+ }
+
long readPortion(void*& buf) {
if (Reader_)
return Reader_->readPortion(buf);
- return -1;
- }
-};
-
-/********************************************************/
+ return -1;
+ }
+};
+
+/********************************************************/
diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h
index b666707038..769828e4ae 100644
--- a/library/cpp/http/fetch/httpparser.h
+++ b/library/cpp/http/fetch/httpparser.h
@@ -1,14 +1,14 @@
#pragma once
-
-#include "httpfsm.h"
-#include "httpheader.h"
-
+
+#include "httpfsm.h"
+#include "httpheader.h"
+
#include <library/cpp/mime/types/mime.h>
#include <util/system/yassert.h>
#include <library/cpp/http/misc/httpcodes.h>
template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20>
-struct TFakeCheck {
+struct TFakeCheck {
bool Check(THttpHeader* /*header*/) {
return false;
}
@@ -22,351 +22,351 @@ struct TFakeCheck {
size_t GetMaxBodySize(THttpHeader*) {
return bodymax;
}
-};
-
-class THttpParserBase {
-public:
- enum States {
- hp_error,
- hp_eof,
- hp_in_header,
- hp_read_alive,
- hp_read_closed,
- hp_begin_chunk_header,
- hp_chunk_header,
- hp_read_chunk
- };
-
+};
+
+class THttpParserBase {
+public:
+ enum States {
+ hp_error,
+ hp_eof,
+ hp_in_header,
+ hp_read_alive,
+ hp_read_closed,
+ hp_begin_chunk_header,
+ hp_chunk_header,
+ hp_read_chunk
+ };
+
States GetState() {
return State;
}
-
+
void setAssumeConnectionClosed(int value) {
- AssumeConnectionClosed = value;
- }
-
+ AssumeConnectionClosed = value;
+ }
+
THttpHeader* GetHttpHeader() const {
return Header;
}
-
-protected:
- int CheckHeaders() {
+
+protected:
+ int CheckHeaders() {
if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) {
- Header->content_length = 0;
- Header->transfer_chunked = 0;
- }
- if (Header->transfer_chunked < -1) {
- Header->error = HTTP_BAD_ENCODING;
- return 1;
- } else if (Header->transfer_chunked == -1) {
- Header->transfer_chunked = 0;
- }
- if (!Header->transfer_chunked && Header->content_length < -1) {
- Header->error = HTTP_BAD_CONTENT_LENGTH;
- return 1;
- }
+ Header->content_length = 0;
+ Header->transfer_chunked = 0;
+ }
+ if (Header->transfer_chunked < -1) {
+ Header->error = HTTP_BAD_ENCODING;
+ return 1;
+ } else if (Header->transfer_chunked == -1) {
+ Header->transfer_chunked = 0;
+ }
+ if (!Header->transfer_chunked && Header->content_length < -1) {
+ Header->error = HTTP_BAD_CONTENT_LENGTH;
+ return 1;
+ }
if (Header->http_status == HTTP_OK) {
- if (Header->compression_method != HTTP_COMPRESSION_UNSET &&
- Header->compression_method != HTTP_COMPRESSION_IDENTITY &&
- Header->compression_method != HTTP_COMPRESSION_GZIP &&
+ if (Header->compression_method != HTTP_COMPRESSION_UNSET &&
+ Header->compression_method != HTTP_COMPRESSION_IDENTITY &&
+ Header->compression_method != HTTP_COMPRESSION_GZIP &&
Header->compression_method != HTTP_COMPRESSION_DEFLATE)
{
- Header->error = HTTP_BAD_CONTENT_ENCODING;
- return 1;
- }
- }
- if (Header->connection_closed == -1)
- Header->connection_closed = (Header->http_minor == 0 ||
- AssumeConnectionClosed);
+ Header->error = HTTP_BAD_CONTENT_ENCODING;
+ return 1;
+ }
+ }
+ if (Header->connection_closed == -1)
+ Header->connection_closed = (Header->http_minor == 0 ||
+ AssumeConnectionClosed);
if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) {
- Header->error = HTTP_LENGTH_UNKNOWN;
- return 1;
- }
- if (Header->http_time < 0)
- Header->http_time = 0;
- if (Header->mime_type < 0)
- Header->mime_type = MIME_UNKNOWN;
- return 0;
- }
-
- THttpHeaderParser HeaderParser;
- THttpChunkParser ChunkParser;
- States State;
- long ChunkSize;
+ Header->error = HTTP_LENGTH_UNKNOWN;
+ return 1;
+ }
+ if (Header->http_time < 0)
+ Header->http_time = 0;
+ if (Header->mime_type < 0)
+ Header->mime_type = MIME_UNKNOWN;
+ return 0;
+ }
+
+ THttpHeaderParser HeaderParser;
+ THttpChunkParser ChunkParser;
+ States State;
+ long ChunkSize;
THttpHeader* Header;
- int AssumeConnectionClosed;
- bool HeadRequest;
-};
-
+ int AssumeConnectionClosed;
+ bool HeadRequest;
+};
+
template <int isReader, typename TCheck = TFakeCheck<>>
-class THttpParserGeneric: public THttpParserBase, public TCheck {
-protected:
+class THttpParserGeneric: public THttpParserBase, public TCheck {
+protected:
long ParseGeneric(void*& buf, long& size) {
- if (!size) {
- switch (State) {
- case hp_error:
- case hp_eof:
- break;
- case hp_read_closed:
- State = hp_eof;
- break;
- case hp_in_header:
- Header->error = HTTP_HEADER_EOF;
- State = hp_error;
- break;
- case hp_read_alive:
- case hp_read_chunk:
- if (HeadRequest)
- State = hp_eof;
- else {
- Header->error = HTTP_MESSAGE_EOF;
- State = hp_error;
- }
- break;
- case hp_begin_chunk_header:
- case hp_chunk_header:
- if (HeadRequest)
- State = hp_eof;
- else {
- Header->error = HTTP_CHUNK_EOF;
- State = hp_error;
- }
- break;
- }
- return 0;
- }
- while (size) {
- int ret;
-
- switch (State) {
- case hp_error:
- return 0;
-
- case hp_eof:
- return 0;
-
- case hp_in_header:
- if ((ret = HeaderParser.Execute(buf, size)) < 0) {
- Header->error = HTTP_BAD_HEADER_STRING;
- State = hp_error;
- return 0;
- } else if (ret == 2) {
- Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1);
- size -= long(HeaderParser.lastchar - (char*)buf + 1);
- buf = HeaderParser.lastchar + 1;
- State = CheckHeaders() ? hp_error
+ if (!size) {
+ switch (State) {
+ case hp_error:
+ case hp_eof:
+ break;
+ case hp_read_closed:
+ State = hp_eof;
+ break;
+ case hp_in_header:
+ Header->error = HTTP_HEADER_EOF;
+ State = hp_error;
+ break;
+ case hp_read_alive:
+ case hp_read_chunk:
+ if (HeadRequest)
+ State = hp_eof;
+ else {
+ Header->error = HTTP_MESSAGE_EOF;
+ State = hp_error;
+ }
+ break;
+ case hp_begin_chunk_header:
+ case hp_chunk_header:
+ if (HeadRequest)
+ State = hp_eof;
+ else {
+ Header->error = HTTP_CHUNK_EOF;
+ State = hp_error;
+ }
+ break;
+ }
+ return 0;
+ }
+ while (size) {
+ int ret;
+
+ switch (State) {
+ case hp_error:
+ return 0;
+
+ case hp_eof:
+ return 0;
+
+ case hp_in_header:
+ if ((ret = HeaderParser.Execute(buf, size)) < 0) {
+ Header->error = HTTP_BAD_HEADER_STRING;
+ State = hp_error;
+ return 0;
+ } else if (ret == 2) {
+ Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1);
+ size -= long(HeaderParser.lastchar - (char*)buf + 1);
+ buf = HeaderParser.lastchar + 1;
+ State = CheckHeaders() ? hp_error
: Header->transfer_chunked ? hp_begin_chunk_header
: Header->content_length == 0 ? hp_eof
: Header->content_length > 0 ? hp_read_alive
: hp_read_closed;
- if (State == hp_begin_chunk_header) {
- // unget \n for chunk reader
+ if (State == hp_begin_chunk_header) {
+ // unget \n for chunk reader
buf = (char*)buf - 1;
- size++;
- }
- if (isReader)
- return size;
- } else {
- Header->header_size += size;
- size = 0;
- }
- break;
-
- case hp_read_alive:
- Header->entity_size += size;
+ size++;
+ }
+ if (isReader)
+ return size;
+ } else {
+ Header->header_size += size;
+ size = 0;
+ }
+ break;
+
+ case hp_read_alive:
+ Header->entity_size += size;
if (Header->entity_size >= Header->content_length) {
- State = hp_eof;
- }
+ State = hp_eof;
+ }
- TCheck::CheckDocPart(buf, size, Header);
- if (isReader)
- return size;
- size = 0;
- break;
-
- case hp_read_closed:
- Header->entity_size += size;
- TCheck::CheckDocPart(buf, size, Header);
- if (isReader)
- return size;
- size = 0;
- break;
-
- case hp_begin_chunk_header:
- ChunkParser.Init();
- State = hp_chunk_header;
+ TCheck::CheckDocPart(buf, size, Header);
+ if (isReader)
+ return size;
+ size = 0;
+ break;
+
+ case hp_read_closed:
+ Header->entity_size += size;
+ TCheck::CheckDocPart(buf, size, Header);
+ if (isReader)
+ return size;
+ size = 0;
+ break;
+
+ case hp_begin_chunk_header:
+ ChunkParser.Init();
+ State = hp_chunk_header;
[[fallthrough]];
-
- case hp_chunk_header:
- if ((ret = ChunkParser.Execute(buf, size)) < 0) {
- Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK);
- State = hp_error;
- return 0;
- } else if (ret == 2) {
- Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1);
- size -= long(ChunkParser.lastchar - (char*)buf + 1);
- buf = ChunkParser.lastchar + 1;
- ChunkSize = ChunkParser.chunk_length;
+
+ case hp_chunk_header:
+ if ((ret = ChunkParser.Execute(buf, size)) < 0) {
+ Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK);
+ State = hp_error;
+ return 0;
+ } else if (ret == 2) {
+ Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1);
+ size -= long(ChunkParser.lastchar - (char*)buf + 1);
+ buf = ChunkParser.lastchar + 1;
+ ChunkSize = ChunkParser.chunk_length;
Y_ASSERT(ChunkSize >= 0);
- State = ChunkSize ? hp_read_chunk : hp_eof;
- } else {
- Header->entity_size += size;
- size = 0;
- }
- break;
-
- case hp_read_chunk:
- if (size >= ChunkSize) {
- Header->entity_size += ChunkSize;
- State = hp_begin_chunk_header;
- TCheck::CheckDocPart(buf, ChunkSize, Header);
- if (isReader)
- return ChunkSize;
- size -= ChunkSize;
+ State = ChunkSize ? hp_read_chunk : hp_eof;
+ } else {
+ Header->entity_size += size;
+ size = 0;
+ }
+ break;
+
+ case hp_read_chunk:
+ if (size >= ChunkSize) {
+ Header->entity_size += ChunkSize;
+ State = hp_begin_chunk_header;
+ TCheck::CheckDocPart(buf, ChunkSize, Header);
+ if (isReader)
+ return ChunkSize;
+ size -= ChunkSize;
buf = (char*)buf + ChunkSize;
- } else {
- Header->entity_size += size;
- ChunkSize -= size;
- TCheck::CheckDocPart(buf, size, Header);
- if (isReader)
- return size;
- size = 0;
- }
+ } else {
+ Header->entity_size += size;
+ ChunkSize -= size;
+ TCheck::CheckDocPart(buf, size, Header);
+ if (isReader)
+ return size;
+ size = 0;
+ }
break;
- }
- }
- return size;
- }
-};
-
+ }
+ }
+ return size;
+ }
+};
+
template <class TCheck = TFakeCheck<>>
-class THttpParser: public THttpParserGeneric<0, TCheck> {
- typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG!
-public:
+class THttpParser: public THttpParserGeneric<0, TCheck> {
+ typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG!
+public:
void Init(THttpHeader* H, bool head_request = false) {
- TBaseT::Header = H;
- TBaseT::HeaderParser.Init(TBaseT::Header);
- TBaseT::State = TBaseT::hp_in_header;
- TBaseT::AssumeConnectionClosed = 0;
- TBaseT::HeadRequest = head_request;
- }
-
+ TBaseT::Header = H;
+ TBaseT::HeaderParser.Init(TBaseT::Header);
+ TBaseT::State = TBaseT::hp_in_header;
+ TBaseT::AssumeConnectionClosed = 0;
+ TBaseT::HeadRequest = head_request;
+ }
+
void Parse(void* buf, long size) {
TBaseT::ParseGeneric(buf, size);
- }
-};
-
-class TMemoReader {
-public:
+ }
+};
+
+class TMemoReader {
+public:
int Init(void* buf, long bufsize) {
- Buf = buf;
- Bufsize = bufsize;
- return 0;
- }
+ Buf = buf;
+ Bufsize = bufsize;
+ return 0;
+ }
long Read(void*& buf) {
Y_ASSERT(Bufsize >= 0);
- if (!Bufsize) {
- Bufsize = -1;
- return 0;
- }
- buf = Buf;
- long ret = Bufsize;
- Bufsize = 0;
- return ret;
- }
-
-protected:
- long Bufsize;
+ if (!Bufsize) {
+ Bufsize = -1;
+ return 0;
+ }
+ buf = Buf;
+ long ret = Bufsize;
+ Bufsize = 0;
+ return ret;
+ }
+
+protected:
+ long Bufsize;
void* Buf;
-};
-
-template <class Reader>
-class THttpReader: public THttpParserGeneric<1>, public Reader {
- typedef THttpParserGeneric<1> TBaseT;
+};
+
+template <class Reader>
+class THttpReader: public THttpParserGeneric<1>, public Reader {
+ typedef THttpParserGeneric<1> TBaseT;
-public:
+public:
using TBaseT::AssumeConnectionClosed;
- using TBaseT::Header;
- using TBaseT::ParseGeneric;
- using TBaseT::State;
-
+ using TBaseT::Header;
+ using TBaseT::ParseGeneric;
+ using TBaseT::State;
+
int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) {
- Header = H;
- Eoferr = 1;
- Size = 0;
+ Header = H;
+ Eoferr = 1;
+ Size = 0;
AssumeConnectionClosed = assumeConnectionClosed;
HeadRequest = headRequest;
- return parsHeader ? ParseHeader() : SkipHeader();
- }
-
+ return parsHeader ? ParseHeader() : SkipHeader();
+ }
+
long Read(void*& buf) {
- long Chunk;
- do {
- if (!Size) {
- if (Eoferr != 1)
- return Eoferr;
- else if ((Size = (long)Reader::Read(Ptr)) < 0) {
- Header->error = HTTP_CONNECTION_LOST;
- return Eoferr = -1;
- }
- }
+ long Chunk;
+ do {
+ if (!Size) {
+ if (Eoferr != 1)
+ return Eoferr;
+ else if ((Size = (long)Reader::Read(Ptr)) < 0) {
+ Header->error = HTTP_CONNECTION_LOST;
+ return Eoferr = -1;
+ }
+ }
Chunk = ParseGeneric(Ptr, Size);
- buf = Ptr;
- Ptr = (char*)Ptr + Chunk;
- Size -= Chunk;
+ buf = Ptr;
+ Ptr = (char*)Ptr + Chunk;
+ Size -= Chunk;
if (State == hp_eof) {
Size = 0;
- Eoferr = 0;
+ Eoferr = 0;
} else if (State == hp_error)
- return Eoferr = -1;
- } while (!Chunk);
- return Chunk;
- }
-
-protected:
- int ParseHeader() {
- HeaderParser.Init(Header);
- State = hp_in_header;
- while (State == hp_in_header) {
- if ((Size = (long)Reader::Read(Ptr)) < 0)
- return Eoferr = -1;
+ return Eoferr = -1;
+ } while (!Chunk);
+ return Chunk;
+ }
+
+protected:
+ int ParseHeader() {
+ HeaderParser.Init(Header);
+ State = hp_in_header;
+ while (State == hp_in_header) {
+ if ((Size = (long)Reader::Read(Ptr)) < 0)
+ return Eoferr = -1;
ParseGeneric(Ptr, Size);
- }
- if (State == hp_error)
- return Eoferr = -1;
- if (State == hp_eof)
- Eoferr = 0;
- return 0;
- }
-
- int SkipHeader() {
- long hdrsize = Header->header_size;
- while (hdrsize) {
- if ((Size = (long)Reader::Read(Ptr)) <= 0)
- return Eoferr = -1;
- if (Size >= hdrsize) {
- Size -= hdrsize;
- Ptr = (char*)Ptr + hdrsize;
- break;
- }
- hdrsize -= Size;
- }
- State = Header->transfer_chunked ? hp_begin_chunk_header
+ }
+ if (State == hp_error)
+ return Eoferr = -1;
+ if (State == hp_eof)
+ Eoferr = 0;
+ return 0;
+ }
+
+ int SkipHeader() {
+ long hdrsize = Header->header_size;
+ while (hdrsize) {
+ if ((Size = (long)Reader::Read(Ptr)) <= 0)
+ return Eoferr = -1;
+ if (Size >= hdrsize) {
+ Size -= hdrsize;
+ Ptr = (char*)Ptr + hdrsize;
+ break;
+ }
+ hdrsize -= Size;
+ }
+ State = Header->transfer_chunked ? hp_begin_chunk_header
: Header->content_length == 0 ? hp_eof
: Header->content_length > 0 ? hp_read_alive
: hp_read_closed;
- Header->entity_size = 0;
- if (State == hp_eof)
- Eoferr = 0;
- else if (State == hp_begin_chunk_header) {
- // unget \n for chunk reader
- Ptr = (char*)Ptr - 1;
- ++Size;
- }
- return 0;
- }
-
+ Header->entity_size = 0;
+ if (State == hp_eof)
+ Eoferr = 0;
+ else if (State == hp_begin_chunk_header) {
+ // unget \n for chunk reader
+ Ptr = (char*)Ptr - 1;
+ ++Size;
+ }
+ return 0;
+ }
+
void* Ptr;
- long Size;
+ long Size;
int Eoferr;
-};
+};
diff --git a/library/cpp/http/fetch/httpzreader.h b/library/cpp/http/fetch/httpzreader.h
index d951d21e9a..68eb00853d 100644
--- a/library/cpp/http/fetch/httpzreader.h
+++ b/library/cpp/http/fetch/httpzreader.h
@@ -1,55 +1,55 @@
#pragma once
-
+
#include "httpheader.h"
#include "httpparser.h"
#include "exthttpcodes.h"
-
+
#include <util/system/defaults.h>
#include <util/generic/yexception.h>
-#include <contrib/libs/zlib/zlib.h>
-
+#include <contrib/libs/zlib/zlib.h>
+
#include <errno.h>
-#ifndef ENOTSUP
+#ifndef ENOTSUP
#define ENOTSUP 45
-#endif
-
-template <class Reader>
-class TCompressedHttpReader: public THttpReader<Reader> {
- typedef THttpReader<Reader> TBase;
+#endif
+
+template <class Reader>
+class TCompressedHttpReader: public THttpReader<Reader> {
+ typedef THttpReader<Reader> TBase;
-public:
+public:
using TBase::AssumeConnectionClosed;
- using TBase::Header;
- using TBase::ParseGeneric;
- using TBase::State;
-
+ using TBase::Header;
+ using TBase::ParseGeneric;
+ using TBase::State;
+
static constexpr size_t DefaultBufSize = 64 << 10;
static constexpr unsigned int DefaultWinSize = 15;
- TCompressedHttpReader()
- : CompressedInput(false)
- , BufSize(0)
- , CurContSize(0)
- , MaxContSize(0)
+ TCompressedHttpReader()
+ : CompressedInput(false)
+ , BufSize(0)
+ , CurContSize(0)
+ , MaxContSize(0)
, Buf(nullptr)
- , ZErr(0)
+ , ZErr(0)
, ConnectionClosed(0)
, IgnoreTrailingGarbage(true)
- {
- memset(&Stream, 0, sizeof(Stream));
- }
-
- ~TCompressedHttpReader() {
- ClearStream();
-
- if (Buf) {
- free(Buf);
+ {
+ memset(&Stream, 0, sizeof(Stream));
+ }
+
+ ~TCompressedHttpReader() {
+ ClearStream();
+
+ if (Buf) {
+ free(Buf);
Buf = nullptr;
- }
- }
-
+ }
+ }
+
void SetConnectionClosed(int cc) {
ConnectionClosed = cc;
}
@@ -66,196 +66,196 @@ public:
const unsigned int winSize = DefaultWinSize,
bool headRequest = false)
{
- ZErr = 0;
- CurContSize = 0;
- MaxContSize = maxContSize;
-
+ ZErr = 0;
+ CurContSize = 0;
+ MaxContSize = maxContSize;
+
int ret = TBase::Init(H, parsHeader, ConnectionClosed, headRequest);
- if (ret)
- return ret;
-
- ret = SetCompression(H->compression_method, bufSize, winSize);
- return ret;
- }
-
+ if (ret)
+ return ret;
+
+ ret = SetCompression(H->compression_method, bufSize, winSize);
+ return ret;
+ }
+
long Read(void*& buf) {
- if (!CompressedInput) {
- long res = TBase::Read(buf);
- if (res > 0) {
- CurContSize += (size_t)res;
- if (CurContSize > MaxContSize) {
- ZErr = E2BIG;
- return -1;
- }
- }
- return res;
- }
-
- while (true) {
- if (Stream.avail_in == 0) {
+ if (!CompressedInput) {
+ long res = TBase::Read(buf);
+ if (res > 0) {
+ CurContSize += (size_t)res;
+ if (CurContSize > MaxContSize) {
+ ZErr = E2BIG;
+ return -1;
+ }
+ }
+ return res;
+ }
+
+ while (true) {
+ if (Stream.avail_in == 0) {
void* tmpin = Stream.next_in;
long res = TBase::Read(tmpin);
Stream.next_in = (Bytef*)tmpin;
if (res <= 0)
return res;
Stream.avail_in = (uInt)res;
- }
-
- Stream.next_out = Buf;
- Stream.avail_out = (uInt)BufSize;
- buf = Buf;
-
- int err = inflate(&Stream, Z_SYNC_FLUSH);
-
+ }
+
+ Stream.next_out = Buf;
+ Stream.avail_out = (uInt)BufSize;
+ buf = Buf;
+
+ int err = inflate(&Stream, Z_SYNC_FLUSH);
+
//Y_ASSERT(Stream.avail_in == 0);
-
- switch (err) {
- case Z_OK:
- // there is no data in next_out yet
- if (BufSize == Stream.avail_out)
- continue;
+
+ switch (err) {
+ case Z_OK:
+ // there is no data in next_out yet
+ if (BufSize == Stream.avail_out)
+ continue;
[[fallthrough]]; // don't break or return; continue with Z_STREAM_END case
-
- case Z_STREAM_END:
- if (Stream.total_out > MaxContSize) {
- ZErr = E2BIG;
- return -1;
- }
+
+ case Z_STREAM_END:
+ if (Stream.total_out > MaxContSize) {
+ ZErr = E2BIG;
+ return -1;
+ }
if (!IgnoreTrailingGarbage && BufSize == Stream.avail_out && Stream.avail_in > 0) {
Header->error = EXT_HTTP_GZIPERROR;
ZErr = EFAULT;
Stream.msg = (char*)"trailing garbage";
return -1;
}
- return long(BufSize - Stream.avail_out);
-
- case Z_NEED_DICT:
- case Z_DATA_ERROR:
- Header->error = EXT_HTTP_GZIPERROR;
- ZErr = EFAULT;
- return -1;
-
- case Z_MEM_ERROR:
- ZErr = ENOMEM;
- return -1;
-
- default:
- ZErr = EINVAL;
- return -1;
- }
- }
-
- return -1;
- }
-
+ return long(BufSize - Stream.avail_out);
+
+ case Z_NEED_DICT:
+ case Z_DATA_ERROR:
+ Header->error = EXT_HTTP_GZIPERROR;
+ ZErr = EFAULT;
+ return -1;
+
+ case Z_MEM_ERROR:
+ ZErr = ENOMEM;
+ return -1;
+
+ default:
+ ZErr = EINVAL;
+ return -1;
+ }
+ }
+
+ return -1;
+ }
+
const char* ZMsg() const {
- return Stream.msg;
- }
-
- int ZError() const {
- return ZErr;
- }
-
- size_t GetCurContSize() const {
- return CompressedInput ? Stream.total_out : CurContSize;
- }
-
-protected:
- int SetCompression(const int compression, const size_t bufSize,
- const unsigned int winSize) {
- ClearStream();
-
- int winsize = winSize;
- switch ((enum HTTP_COMPRESSION)compression) {
- case HTTP_COMPRESSION_UNSET:
- case HTTP_COMPRESSION_IDENTITY:
- CompressedInput = false;
- return 0;
- case HTTP_COMPRESSION_GZIP:
- CompressedInput = true;
+ return Stream.msg;
+ }
+
+ int ZError() const {
+ return ZErr;
+ }
+
+ size_t GetCurContSize() const {
+ return CompressedInput ? Stream.total_out : CurContSize;
+ }
+
+protected:
+ int SetCompression(const int compression, const size_t bufSize,
+ const unsigned int winSize) {
+ ClearStream();
+
+ int winsize = winSize;
+ switch ((enum HTTP_COMPRESSION)compression) {
+ case HTTP_COMPRESSION_UNSET:
+ case HTTP_COMPRESSION_IDENTITY:
+ CompressedInput = false;
+ return 0;
+ case HTTP_COMPRESSION_GZIP:
+ CompressedInput = true;
winsize += 16; // 16 indicates gzip, see zlib.h
- break;
- case HTTP_COMPRESSION_DEFLATE:
- CompressedInput = true;
- winsize = -winsize; // negative indicates raw deflate stream, see zlib.h
- break;
- case HTTP_COMPRESSION_COMPRESS:
- case HTTP_COMPRESSION_ERROR:
- default:
- CompressedInput = false;
- ZErr = ENOTSUP;
- return -1;
- }
-
- if (bufSize != BufSize) {
- if (Buf)
- free(Buf);
- Buf = (ui8*)malloc(bufSize);
- if (!Buf) {
- ZErr = ENOMEM;
- return -1;
- }
- BufSize = bufSize;
- }
-
- int err = inflateInit2(&Stream, winsize);
- switch (err) {
- case Z_OK:
- Stream.total_in = 0;
- Stream.total_out = 0;
- Stream.avail_in = 0;
- return 0;
-
- case Z_DATA_ERROR: // never happens, see zlib.h
- CompressedInput = false;
- ZErr = EFAULT;
- return -1;
-
- case Z_MEM_ERROR:
- CompressedInput = false;
- ZErr = ENOMEM;
- return -1;
-
- default:
- CompressedInput = false;
- ZErr = EINVAL;
- return -1;
- }
- }
-
- void ClearStream() {
- if (CompressedInput) {
- inflateEnd(&Stream);
- CompressedInput = false;
- }
- }
-
- z_stream Stream;
- bool CompressedInput;
- size_t BufSize;
- size_t CurContSize, MaxContSize;
- ui8* Buf;
- int ZErr;
+ break;
+ case HTTP_COMPRESSION_DEFLATE:
+ CompressedInput = true;
+ winsize = -winsize; // negative indicates raw deflate stream, see zlib.h
+ break;
+ case HTTP_COMPRESSION_COMPRESS:
+ case HTTP_COMPRESSION_ERROR:
+ default:
+ CompressedInput = false;
+ ZErr = ENOTSUP;
+ return -1;
+ }
+
+ if (bufSize != BufSize) {
+ if (Buf)
+ free(Buf);
+ Buf = (ui8*)malloc(bufSize);
+ if (!Buf) {
+ ZErr = ENOMEM;
+ return -1;
+ }
+ BufSize = bufSize;
+ }
+
+ int err = inflateInit2(&Stream, winsize);
+ switch (err) {
+ case Z_OK:
+ Stream.total_in = 0;
+ Stream.total_out = 0;
+ Stream.avail_in = 0;
+ return 0;
+
+ case Z_DATA_ERROR: // never happens, see zlib.h
+ CompressedInput = false;
+ ZErr = EFAULT;
+ return -1;
+
+ case Z_MEM_ERROR:
+ CompressedInput = false;
+ ZErr = ENOMEM;
+ return -1;
+
+ default:
+ CompressedInput = false;
+ ZErr = EINVAL;
+ return -1;
+ }
+ }
+
+ void ClearStream() {
+ if (CompressedInput) {
+ inflateEnd(&Stream);
+ CompressedInput = false;
+ }
+ }
+
+ z_stream Stream;
+ bool CompressedInput;
+ size_t BufSize;
+ size_t CurContSize, MaxContSize;
+ ui8* Buf;
+ int ZErr;
int ConnectionClosed;
bool IgnoreTrailingGarbage;
-};
-
+};
+
class zlib_exception: public yexception {
-};
-
-template <class Reader>
-class SCompressedHttpReader: public TCompressedHttpReader<Reader> {
- typedef TCompressedHttpReader<Reader> TBase;
-
-public:
- using TBase::ZError;
- using TBase::ZMsg;
-
- SCompressedHttpReader()
+};
+
+template <class Reader>
+class SCompressedHttpReader: public TCompressedHttpReader<Reader> {
+ typedef TCompressedHttpReader<Reader> TBase;
+
+public:
+ using TBase::ZError;
+ using TBase::ZMsg;
+
+ SCompressedHttpReader()
: TBase()
{
}
-
+
int Init(
THttpHeader* H,
int parsHeader,
@@ -265,31 +265,31 @@ public:
bool headRequest = false)
{
int ret = TBase::Init(H, parsHeader, maxContSize, bufSize, winSize, headRequest);
- return (int)HandleRetValue((long)ret);
- }
-
+ return (int)HandleRetValue((long)ret);
+ }
+
long Read(void*& buf) {
- long ret = TBase::Read(buf);
- return HandleRetValue(ret);
- }
-
-protected:
- long HandleRetValue(long ret) {
- switch (ZError()) {
- case 0:
- return ret;
- case ENOMEM:
+ long ret = TBase::Read(buf);
+ return HandleRetValue(ret);
+ }
+
+protected:
+ long HandleRetValue(long ret) {
+ switch (ZError()) {
+ case 0:
+ return ret;
+ case ENOMEM:
ythrow yexception() << "SCompressedHttpReader: not enough memory";
- case EINVAL:
+ case EINVAL:
ythrow yexception() << "SCompressedHttpReader: zlib error: " << ZMsg();
- case ENOTSUP:
+ case ENOTSUP:
ythrow yexception() << "SCompressedHttpReader: unsupported compression method";
- case EFAULT:
+ case EFAULT:
ythrow zlib_exception() << "SCompressedHttpReader: " << ZMsg();
- case E2BIG:
+ case E2BIG:
ythrow zlib_exception() << "SCompressedHttpReader: Content exceeds maximum length";
- default:
+ default:
ythrow yexception() << "SCompressedHttpReader: unknown error";
- }
- }
-};
+ }
+ }
+};
diff --git a/library/cpp/http/fetch/ya.make b/library/cpp/http/fetch/ya.make
index aa8b073a8c..7737127463 100644
--- a/library/cpp/http/fetch/ya.make
+++ b/library/cpp/http/fetch/ya.make
@@ -1,5 +1,5 @@
LIBRARY()
-
+
OWNER(
g:zora
)
@@ -14,25 +14,25 @@ PEERDIR(
library/cpp/uri
)
-SRCS(
- http_digest.cpp
- http_socket.cpp
+SRCS(
+ http_digest.cpp
+ http_socket.cpp
httpheader.cpp
- httpload.cpp
- exthttpcodes.cpp
+ httpload.cpp
+ exthttpcodes.cpp
httpfsm.rl6
- httpagent.h
- httpfetcher.h
- httpheader.h
- httpparser.h
- httpzreader.h
+ httpagent.h
+ httpfetcher.h
+ httpheader.h
+ httpparser.h
+ httpzreader.h
sockhandler.h
-)
-
+)
+
GENERATE_ENUM_SERIALIZATION(httpheader.h)
SET(RAGEL6_FLAGS -CF1)
-END()
+END()
RECURSE_FOR_TESTS(ut)