diff options
| author | leo <[email protected]> | 2022-02-10 16:46:40 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:46:40 +0300 | 
| commit | 99609724f661f7e21d1cb08e8d80e87c3632fdb3 (patch) | |
| tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/http/fetch | |
| parent | 980edcd3304699edf9d4e4d6a656e585028e2a72 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
| -rw-r--r-- | library/cpp/http/fetch/exthttpcodes.cpp | 44 | ||||
| -rw-r--r-- | library/cpp/http/fetch/exthttpcodes.h | 42 | ||||
| -rw-r--r-- | library/cpp/http/fetch/http_digest.cpp | 238 | ||||
| -rw-r--r-- | library/cpp/http/fetch/http_digest.h | 32 | ||||
| -rw-r--r-- | library/cpp/http/fetch/http_socket.cpp | 242 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpfetcher.h | 212 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpfsm.h | 74 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 434 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpheader.h | 84 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpload.cpp | 376 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpload.h | 316 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpparser.h | 606 | ||||
| -rw-r--r-- | library/cpp/http/fetch/httpzreader.h | 440 | ||||
| -rw-r--r-- | library/cpp/http/fetch/ya.make | 28 | 
14 files changed, 1584 insertions, 1584 deletions
diff --git a/library/cpp/http/fetch/exthttpcodes.cpp b/library/cpp/http/fetch/exthttpcodes.cpp index ac0d3b359da..acc05650c89 100644 --- a/library/cpp/http/fetch/exthttpcodes.cpp +++ b/library/cpp/http/fetch/exthttpcodes.cpp @@ -1,18 +1,18 @@  #include "exthttpcodes.h" -#include <cstring>  -  +#include <cstring> +  const ui16 CrazyServer = ShouldDelete | MarkSuspect; -  +  struct http_flag {      ui16 http;      ui16 flag;  }; -static http_flag HTTP_FLAG[] = {  +static http_flag HTTP_FLAG[] = {      {HTTP_CONTINUE, MarkSuspect},            // 100      {HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101      {HTTP_PROCESSING, CrazyServer},          // 102 -  +      {HTTP_OK, ShouldReindex},                            // 200      {HTTP_CREATED, CrazyServer},                         // 201      {HTTP_ACCEPTED, ShouldDelete},                       // 202 @@ -23,7 +23,7 @@ static http_flag HTTP_FLAG[] = {      {HTTP_MULTI_STATUS, CrazyServer},                    // 207      {HTTP_ALREADY_REPORTED, CrazyServer},                // 208      {HTTP_IM_USED, CrazyServer},                         // 226 -  +      {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete},                  // 300      {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir},  // 301      {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir},              // 302 @@ -32,7 +32,7 @@ static http_flag HTTP_FLAG[] = {      {HTTP_USE_PROXY, ShouldDelete},                                      // 305      {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307      {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308 -  +      {HTTP_BAD_REQUEST, CrazyServer},                                       // 400      {HTTP_UNAUTHORIZED, ShouldDelete},                                     // 401      {HTTP_PAYMENT_REQUIRED, ShouldDelete},                                 // 402 @@ -53,7 +53,7 @@ static http_flag HTTP_FLAG[] = {      {HTTP_EXPECTATION_FAILED, ShouldDelete},                               // 417      {HTTP_I_AM_A_TEAPOT, CrazyServer},                                     // 418      {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete},                           // 419 -  +      {HTTP_MISDIRECTED_REQUEST, CrazyServer},                                // 421      {HTTP_UNPROCESSABLE_ENTITY, CrazyServer},                               // 422      {HTTP_LOCKED, ShouldDelete},                                            // 423 @@ -62,7 +62,7 @@ static http_flag HTTP_FLAG[] = {      {HTTP_PRECONDITION_REQUIRED, ShouldDelete},                             // 428      {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429      {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete},                     // 451 -  +      {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect},                                // 500      {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect},                  // 501      {HTTP_BAD_GATEWAY, MarkSuspect},                                          // 502 @@ -116,7 +116,7 @@ static http_flag HTTP_FLAG[] = {      {HTTP_FETCHER_BAD_RESPONSE, 0},                                      // 1040      {HTTP_FETCHER_MB_ERROR, 0},                                          // 1041      {HTTP_SSL_CERT_ERROR, 0},                                            // 1042 -  +      // Custom (replace HTTP 200/304)      {EXT_HTTP_MIRRMOVE, 0},                                          // 2000      {EXT_HTTP_MANUAL_DELETE, ShouldDelete},                          // 2001 @@ -142,34 +142,34 @@ static http_flag HTTP_FLAG[] = {      {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete},                         // 2024      {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025      {0, 0}}; -  +  static ui16* prepare_flags(http_flag* arg) { -    static ui16 flags[EXT_HTTP_CODE_MAX];  +    static ui16 flags[EXT_HTTP_CODE_MAX];      http_flag* ptr; -    size_t i;  -  +    size_t i; +      // устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше      for (i = 0; i < EXT_HTTP_CODE_MAX; ++i) -        flags[i] = CrazyServer;  -  +        flags[i] = CrazyServer; +      // устанавливаем флаги для перечисленных кодов      for (ptr = arg; ptr->http; ++ptr)          flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; -  +      // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их      // всем кодам не перечисленным в таблице выше      for (size_t group = 0; group < 1000; group += 100)          for (size_t j = group + 1; j < group + 100; ++j)              flags[j] = flags[group]; -  +      // предыдущий цикл затер некоторые флаги перечисленные в таблице выше      // восстанавливаем их      for (ptr = arg; ptr->http; ++ptr)          flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; -  -    return flags;  -}  -  + +    return flags; +} +  ui16* http2status = prepare_flags(HTTP_FLAG);  TStringBuf ExtHttpCodeStr(int code) noexcept { diff --git a/library/cpp/http/fetch/exthttpcodes.h b/library/cpp/http/fetch/exthttpcodes.h index 88bfe8d8299..6b525052cd9 100644 --- a/library/cpp/http/fetch/exthttpcodes.h +++ b/library/cpp/http/fetch/exthttpcodes.h @@ -1,9 +1,9 @@  #pragma once -  -#include <util/system/defaults.h>  + +#include <util/system/defaults.h>  #include <library/cpp/http/misc/httpcodes.h> -  -enum ExtHttpCodes {  + +enum ExtHttpCodes {      // Custom      HTTP_EXTENDED = 1000,      HTTP_BAD_RESPONSE_HEADER = 1000, @@ -50,8 +50,8 @@ enum ExtHttpCodes {      HTTP_FETCHER_MB_ERROR = 1041,      HTTP_SSL_CERT_ERROR = 1042,      HTTP_PROXY_REQUEST_CANCELED = 1051, -  -    // Custom (replace HTTP 200/304)  + +    // Custom (replace HTTP 200/304)      EXT_HTTP_EXT_SUCCESS_BEGIN = 2000, // to check if code variable is in success interval      EXT_HTTP_MIRRMOVE = 2000,      EXT_HTTP_MANUAL_DELETE = 2001, @@ -104,22 +104,22 @@ enum ExtHttpCodes {      EXT_HTTP_WRONGMULTILANG = 3023,      EXT_HTTP_SOFTMIRRORS = 3024,      EXT_HTTP_BIGLEVEL = 3025, -  -    // fast robot codes  -  + +    // fast robot codes +      EXT_HTTP_FASTHOPS = 4000,      EXT_HTTP_NODOC = 4001,      EXT_HTTP_MAX -};  -  -enum HttpFlags {  -    // connection  -    ShouldDisconnect = 1,  +}; + +enum HttpFlags { +    // connection +    ShouldDisconnect = 1,      ShouldRetry = 2,      // UNUSED 4 -  -    // indexer  + +    // indexer      ShouldReindex = 8,      ShouldDelete = 16,      CheckLocation = 32, @@ -129,13 +129,13 @@ enum HttpFlags {      // UNUSED 512      MoveRedir = 1024,      CanBeFake = 2048, -};  -  +}; +  const size_t EXT_HTTP_CODE_MAX = 1 << 12; -  -static inline int Http2Status(int code) {  + +static inline int Http2Status(int code) {      extern ui16* http2status;      return http2status[code & (EXT_HTTP_CODE_MAX - 1)]; -}  +}  TStringBuf ExtHttpCodeStr(int code) noexcept; diff --git a/library/cpp/http/fetch/http_digest.cpp b/library/cpp/http/fetch/http_digest.cpp index 1c8bc6f449b..1eaa02b7f24 100644 --- a/library/cpp/http/fetch/http_digest.cpp +++ b/library/cpp/http/fetch/http_digest.cpp @@ -3,204 +3,204 @@  #include <library/cpp/digest/md5/md5.h>  #include <util/stream/output.h>  #include <util/stream/str.h> -  -/************************************************************/  -/************************************************************/  -static const char* WWW_PREFIX = "Authorization: Digest ";  -  -/************************************************************/  + +/************************************************************/ +/************************************************************/ +static const char* WWW_PREFIX = "Authorization: Digest "; + +/************************************************************/  httpDigestHandler::httpDigestHandler()      : User_(nullptr)      , Password_(nullptr)      , Nonce_(nullptr)      , NonceCount_(0)      , HeaderInstruction_(nullptr) -{  -}  -  -/************************************************************/  +{ +} + +/************************************************************/  httpDigestHandler::~httpDigestHandler() { -    clear();  -}  -  -/************************************************************/  +    clear(); +} + +/************************************************************/  void httpDigestHandler::clear() {      free(Nonce_);      free(HeaderInstruction_);      User_ = Password_ = nullptr;      Nonce_ = HeaderInstruction_ = nullptr;      NonceCount_ = 0; -}  -  -/************************************************************/  +} + +/************************************************************/  void httpDigestHandler::setAuthorization(const char* user, const char* password) { -    clear();  +    clear();      if (user && password) {          User_ = user;          Password_ = password; -    }  -}  -  -/************************************************************/  +    } +} + +/************************************************************/  const char* httpDigestHandler::getHeaderInstruction() const {      return HeaderInstruction_; -}  -  -/************************************************************/  +} + +/************************************************************/  void httpDigestHandler::generateCNonce(char* outCNonce) { -    if (!*outCNonce)  +    if (!*outCNonce)          sprintf(outCNonce, "%ld", (long)time(nullptr)); -}  -  -/************************************************************/  +} + +/************************************************************/  inline void addMD5(MD5& ctx, const char* value) { -    ctx.Update((const unsigned char*)(value), strlen(value));  -}  -  +    ctx.Update((const unsigned char*)(value), strlen(value)); +} +  inline void addMD5(MD5& ctx, const char* value, int len) { -    ctx.Update((const unsigned char*)(value), len);  -}  -  +    ctx.Update((const unsigned char*)(value), len); +} +  inline void addMD5Sep(MD5& ctx) { -    addMD5(ctx, ":", 1);  -}  -  -/************************************************************/  -/* calculate H(A1) as per spec */  +    addMD5(ctx, ":", 1); +} + +/************************************************************/ +/* calculate H(A1) as per spec */  void httpDigestHandler::digestCalcHA1(const THttpAuthHeader& hd,                                        char* outSessionKey,                                        char* outCNonce) { -    MD5 ctx;  -    ctx.Init();  +    MD5 ctx; +    ctx.Init();      addMD5(ctx, User_);      addMD5Sep(ctx);      addMD5(ctx, hd.realm);      addMD5Sep(ctx);      addMD5(ctx, Password_); -  +      if (hd.algorithm == 1) { //MD5-sess -        unsigned char digest[16];  -        ctx.Final(digest);  -  -        generateCNonce(outCNonce);  -  -        ctx.Init();  -        ctx.Update(digest, 16);  +        unsigned char digest[16]; +        ctx.Final(digest); + +        generateCNonce(outCNonce); + +        ctx.Init(); +        ctx.Update(digest, 16);          addMD5Sep(ctx);          addMD5(ctx, hd.nonce);          addMD5Sep(ctx);          addMD5(ctx, outCNonce); -        ctx.End(outSessionKey);  -    }  -  +        ctx.End(outSessionKey); +    } +      ctx.End(outSessionKey); -};  -  -/************************************************************/  -/* calculate request-digest/response-digest as per HTTP Digest spec */  +}; + +/************************************************************/ +/* calculate request-digest/response-digest as per HTTP Digest spec */  void httpDigestHandler::digestCalcResponse(const THttpAuthHeader& hd,                                             const char* path,                                             const char* method,                                             const char* nonceCount,                                             char* outResponse,                                             char* outCNonce) { -    char HA1[33];  -    digestCalcHA1(hd, HA1, outCNonce);  -  -    char HA2[33];  -    MD5 ctx;  -    ctx.Init();  +    char HA1[33]; +    digestCalcHA1(hd, HA1, outCNonce); + +    char HA2[33]; +    MD5 ctx; +    ctx.Init();      addMD5(ctx, method);      addMD5Sep(ctx);      addMD5(ctx, path); -    //ignore auth-int  -    ctx.End(HA2);  -  -    ctx.Init();  +    //ignore auth-int +    ctx.End(HA2); + +    ctx.Init();      addMD5(ctx, HA1, 32);      addMD5Sep(ctx);      addMD5(ctx, Nonce_);      addMD5Sep(ctx); -  +      if (hd.qop_auth) { -        if (!*outCNonce)  -            generateCNonce(outCNonce);  -  +        if (!*outCNonce) +            generateCNonce(outCNonce); +          addMD5(ctx, nonceCount, 8);          addMD5Sep(ctx);          addMD5(ctx, outCNonce);          addMD5Sep(ctx);          addMD5(ctx, "auth", 4);          addMD5Sep(ctx); -    }  +    }      addMD5(ctx, HA2, 32); -    ctx.End(outResponse);  -}  -  -/************************************************************/  +    ctx.End(outResponse); +} + +/************************************************************/  bool httpDigestHandler::processHeader(const THttpAuthHeader* header,                                        const char* path,                                        const char* method,                                        const char* cnonce) {      if (!User_ || !header || !header->use_auth || !header->realm || !header->nonce) -        return false;  -  +        return false; +      if (Nonce_) {          if (strcmp(Nonce_, header->nonce)) {              free(Nonce_);              Nonce_ = nullptr;              NonceCount_ = 0; -        }  -    }  +        } +    }      if (!Nonce_) {          Nonce_ = strdup(header->nonce);          NonceCount_ = 0; -    }  +    }      free(HeaderInstruction_);      HeaderInstruction_ = nullptr;      NonceCount_++; -  -    char nonceCount[20];  + +    char nonceCount[20];      sprintf(nonceCount, "%08d", NonceCount_); -  -    char CNonce[50];  -    if (cnonce)  -        strcpy(CNonce, cnonce);  -    else  + +    char CNonce[50]; +    if (cnonce) +        strcpy(CNonce, cnonce); +    else          CNonce[0] = 0; -  -    char response[33];  + +    char response[33];      digestCalcResponse(*header, path, method, nonceCount, response, CNonce); -  -    //digest-response  = 1#( username | realm | nonce | digest-uri  -    //                   | response | [ algorithm ] | [cnonce] |  -    //                   [opaque] | [message-qop] |  -    //                   [nonce-count]  | [auth-param] )  -  -    TStringStream out;  + +    //digest-response  = 1#( username | realm | nonce | digest-uri +    //                   | response | [ algorithm ] | [cnonce] | +    //                   [opaque] | [message-qop] | +    //                   [nonce-count]  | [auth-param] ) + +    TStringStream out;      out << WWW_PREFIX << "username=\"" << User_ << "\""; -    out << ", realm=\"" << header->realm << "\"";  -    out << ", nonce=\"" << header->nonce << "\"";  -    out << ", uri=\"" << path << "\"";  +    out << ", realm=\"" << header->realm << "\""; +    out << ", nonce=\"" << header->nonce << "\""; +    out << ", uri=\"" << path << "\"";      if (header->algorithm == 1) -        out << ", algorithm=MD5-sess";  -    else  -        out << ", algorithm=MD5";  -    if (header->qop_auth)  -        out << ", qop=auth";  -    out << ", nc=" << nonceCount;  -    if (CNonce[0])  -        out << ", cnonce=\"" << CNonce << "\"";  -    out << ", response=\"" << response << "\"";  -    if (header->opaque)  -        out << ", opaque=\"" << header->opaque << "\"";  -    out << "\r\n";  -  +        out << ", algorithm=MD5-sess"; +    else +        out << ", algorithm=MD5"; +    if (header->qop_auth) +        out << ", qop=auth"; +    out << ", nc=" << nonceCount; +    if (CNonce[0]) +        out << ", cnonce=\"" << CNonce << "\""; +    out << ", response=\"" << response << "\""; +    if (header->opaque) +        out << ", opaque=\"" << header->opaque << "\""; +    out << "\r\n"; +      TString s_out = out.Str();      HeaderInstruction_ = strdup(s_out.c_str()); -  -    return true;  -}  -  -/************************************************************/  -/************************************************************/  + +    return true; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/http_digest.h b/library/cpp/http/fetch/http_digest.h index 018107c2e45..3b1872d70b5 100644 --- a/library/cpp/http/fetch/http_digest.h +++ b/library/cpp/http/fetch/http_digest.h @@ -1,10 +1,10 @@  #pragma once -  +  #include "httpheader.h" -#include <util/system/compat.h>  +#include <util/system/compat.h>  #include <library/cpp/http/misc/httpcodes.h> -  +  class httpDigestHandler {  protected:      const char* User_; @@ -12,36 +12,36 @@ protected:      char* Nonce_;      int NonceCount_;      char* HeaderInstruction_; -  -    void clear();  -  + +    void clear(); +      void generateCNonce(char* outCNonce); -  +      void digestCalcHA1(const THttpAuthHeader& hd,                         char* outSessionKey,                         char* outCNonce); -  +      void digestCalcResponse(const THttpAuthHeader& hd,                              const char* method,                              const char* path,                              const char* nonceCount,                              char* outResponse,                              char* outCNonce); -  +  public: -    httpDigestHandler();  -    ~httpDigestHandler();  -  +    httpDigestHandler(); +    ~httpDigestHandler(); +      void setAuthorization(const char* user,                            const char* password);      bool processHeader(const THttpAuthHeader* header,                         const char* path,                         const char* method,                         const char* cnonce = nullptr); -  +      bool empty() const {          return (!User_); -    }  -  +    } +      const char* getHeaderInstruction() const; -};  +}; diff --git a/library/cpp/http/fetch/http_socket.cpp b/library/cpp/http/fetch/http_socket.cpp index 870d9274895..1524ef04a80 100644 --- a/library/cpp/http/fetch/http_socket.cpp +++ b/library/cpp/http/fetch/http_socket.cpp @@ -1,30 +1,30 @@ -#include "httpload.h"  -#include "http_digest.h"  -  -/************************************************************/  -  -#ifdef USE_GNUTLS  -  -#include <gcrypt.h>  -#include <gnutls/gnutls.h>  +#include "httpload.h" +#include "http_digest.h" + +/************************************************************/ + +#ifdef USE_GNUTLS + +#include <gcrypt.h> +#include <gnutls/gnutls.h>  #include <util/network/init.h> -#include <util/network/socket.h>  -#include <util/system/mutex.h>  -  -/********************************************************/  -// HTTPS handler is used as implementation of  -// socketAbstractHandler for work through HTTPS protocol  -  +#include <util/network/socket.h> +#include <util/system/mutex.h> + +/********************************************************/ +// HTTPS handler is used as implementation of +// socketAbstractHandler for work through HTTPS protocol +  class socketSecureHandler: public socketRegularHandler {  protected:      bool IsValid_;      gnutls_session Session_;      gnutls_certificate_credentials Credits_; -  +  public:      socketSecureHandler();      virtual ~socketSecureHandler(); -  +      virtual bool Good();      virtual int Connect(const TAddrList& addrs, TDuration Timeout);      virtual void Disconnect(); @@ -32,175 +32,175 @@ public:      virtual bool send(const char* message, ssize_t messlen);      virtual bool peek();      virtual ssize_t read(void* buffer, ssize_t buflen); -};  -  -/********************************************************/  -/********************************************************/  +}; + +/********************************************************/ +/********************************************************/  static int gcry_pthread_mutex_init(void** priv) { -    int err = 0;  -  -    try {  +    int err = 0; + +    try {          TMutex* lock = new TMutex; -        *priv = lock;  +        *priv = lock;      } catch (...) { -        err = -1;  -    }  -  -    return err;  -}  -  +        err = -1; +    } + +    return err; +} +  static int gcry_pthread_mutex_destroy(void** lock) {      delete static_cast<TMutex*>(*lock); -    return 0;  -}  -  +    return 0; +} +  static int gcry_pthread_mutex_lock(void** lock) {      static_cast<TMutex*>(*lock)->Acquire(); -  -    return 0;  -}  -  + +    return 0; +} +  static int gcry_pthread_mutex_unlock(void** lock) {      static_cast<TMutex*>(*lock)->Release(); -  -    return 0;  -}  -  -static struct gcry_thread_cbs gcry_threads_pthread =  + +    return 0; +} + +static struct gcry_thread_cbs gcry_threads_pthread =      {          GCRY_THREAD_OPTION_PTHREAD, NULL,          gcry_pthread_mutex_init, gcry_pthread_mutex_destroy,          gcry_pthread_mutex_lock, gcry_pthread_mutex_unlock,          NULL, NULL, NULL, NULL,          NULL, NULL, NULL, NULL}; -  -/********************************************************/  + +/********************************************************/  struct https_initor {      https_initor() {          gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread); -        gnutls_global_init();  +        gnutls_global_init();          InitNetworkSubSystem(); -    }  -  +    } +      ~https_initor() { -        gnutls_global_deinit();  -    }  -};  -  -static https_initor _initor;  -  -/********************************************************/  +        gnutls_global_deinit(); +    } +}; + +static https_initor _initor; + +/********************************************************/  socketSecureHandler::socketSecureHandler()      : socketRegularHandler()      , IsValid_(false)      , Session_()      , Credits_() -{  -}  -  -/********************************************************/  +{ +} + +/********************************************************/  socketSecureHandler::~socketSecureHandler() {      if (IsValid_) -        Disconnect();  -}  -  -/********************************************************/  +        Disconnect(); +} + +/********************************************************/  bool socketSecureHandler::Good() {      return Socket_.Good() && IsValid_; -}  -  -/********************************************************/  +} + +/********************************************************/  int socketSecureHandler::Connect(const TAddrList& addrs, TDuration Timeout) {      IsValid_ = false; -  +      int ret = socketRegularHandler::Connect(addrs, Timeout); -    if (ret)  -        return ret;  -  +    if (ret) +        return ret; +      gnutls_certificate_allocate_credentials(&Credits_);      gnutls_init(&Session_, GNUTLS_CLIENT);      gnutls_set_default_priority(Session_);      gnutls_credentials_set(Session_, GNUTLS_CRD_CERTIFICATE, Credits_); -  +      SOCKET fd = Socket_;      gnutls_transport_set_ptr(Session_, (gnutls_transport_ptr)fd); -  +      ret = gnutls_handshake(Session_); -  +      if (ret < 0) { -        fprintf(stderr, "*** Handshake failed\n");  -        gnutls_perror(ret);  -  +        fprintf(stderr, "*** Handshake failed\n"); +        gnutls_perror(ret); +          gnutls_deinit(Session_);          if (Credits_) {              gnutls_certificate_free_credentials(Credits_);              Credits_ = 0; -        }  -        return 1;  -    }  -  +        } +        return 1; +    } +      IsValid_ = true;      return !IsValid_; -}  -  -/********************************************************/  +} + +/********************************************************/  void socketSecureHandler::Disconnect() {      if (IsValid_) {          gnutls_bye(Session_, GNUTLS_SHUT_RDWR);          IsValid_ = false;          gnutls_deinit(Session_); -    }  -  +    } +      if (Credits_) {          gnutls_certificate_free_credentials(Credits_);          Credits_ = 0; -    }  -  -    socketRegularHandler::Disconnect();  -}  -  -/********************************************************/  +    } + +    socketRegularHandler::Disconnect(); +} + +/********************************************************/  void socketSecureHandler::shutdown() { -}  -  -/********************************************************/  +} + +/********************************************************/  bool socketSecureHandler::send(const char* message, ssize_t messlen) {      if (!IsValid_) -        return false;  +        return false;      ssize_t rv = gnutls_record_send(Session_, message, messlen); -    return rv >= 0;  -}  -  -/********************************************************/  +    return rv >= 0; +} + +/********************************************************/  bool socketSecureHandler::peek() { -    //ssize_t rv = gnutls_record_check_pending(mSession);  -    //return rv>0;  -    return true;  -}  -  -/********************************************************/  +    //ssize_t rv = gnutls_record_check_pending(mSession); +    //return rv>0; +    return true; +} + +/********************************************************/  ssize_t socketSecureHandler::read(void* buffer, ssize_t buflen) {      if (!IsValid_) -        return false;  +        return false;      return gnutls_record_recv(Session_, (char*)buffer, buflen); -}  -  -#endif  -  -/************************************************************/  +} + +#endif + +/************************************************************/  socketAbstractHandler* socketHandlerFactory::chooseHandler(const THttpURL& url) {      if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTP) -        return new socketRegularHandler;  -  +        return new socketRegularHandler; +  #ifdef USE_GNUTLS      if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTPS) -        return new socketSecureHandler;  +        return new socketSecureHandler;  #endif -  +      return nullptr; -}  -  -/************************************************************/  -socketHandlerFactory socketHandlerFactory::sInstance;  -/************************************************************/  +} + +/************************************************************/ +socketHandlerFactory socketHandlerFactory::sInstance; +/************************************************************/ diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h index 1c5b94a678e..7fc251afd2e 100644 --- a/library/cpp/http/fetch/httpfetcher.h +++ b/library/cpp/http/fetch/httpfetcher.h @@ -1,22 +1,22 @@  #pragma once -  -#ifdef _MSC_VER  + +#ifdef _MSC_VER  #include <io.h> -#endif  -  +#endif +  #include <library/cpp/http/misc/httpdate.h> -  -#include "httpagent.h"  -#include "httpparser.h"  -  -struct TFakeBackup {  + +#include "httpagent.h" +#include "httpparser.h" + +struct TFakeBackup {      int Write(void* /*buf*/, size_t /*size*/) {          return 0;      } -};  -  -template <size_t bufsize = 5000>  -struct TFakeAlloc {  +}; + +template <size_t bufsize = 5000> +struct TFakeAlloc {      void Shrink(void* /*buf*/, size_t /*size*/) {      }      void* Grab(size_t /*min*/, size_t* real) { @@ -24,17 +24,17 @@ struct TFakeAlloc {          return buf;      }      char buf[bufsize]; -};  -  +}; +  template <typename TAlloc = TFakeAlloc<>,            typename TCheck = TFakeCheck<>, -          typename TWriter = TFakeBackup,  +          typename TWriter = TFakeBackup,            typename TAgent = THttpAgent<>>  class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent { -public:  -    static const size_t TCP_MIN = 1500;  -    static int TerminateNow;  -  +public: +    static const size_t TCP_MIN = 1500; +    static int TerminateNow; +      THttpFetcher()          : THttpParser<TCheck>()          , TAlloc() @@ -47,54 +47,54 @@ public:      }      int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) { -        int ret = 0;  -        int fetcherr = 0;  -  -        THttpParser<TCheck>::Init(header, head_request);  +        int ret = 0; +        int fetcherr = 0; + +        THttpParser<TCheck>::Init(header, head_request);          const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme());          size_t schemelen = strlen(scheme); -        if (*path == '/') {  +        if (*path == '/') {              header->base = TStringBuf(scheme, schemelen);              header->base += TStringBuf("://", 3);              header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg);              header->base += path; -        } else {  +        } else {              if (strlen(path) >= FETCHER_URL_MAX) {                  header->error = HTTP_URL_TOO_LARGE;                  return 0;              }              header->base = path; -        }  -  -        if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) {  -            header->error = (i16)ret;  -            return 0;  -        }  -  -        bool inheader = 1;  +        } + +        if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) { +            header->error = (i16)ret; +            return 0; +        } + +        bool inheader = 1;          void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr; -        ssize_t got;  -        size_t buffree = 0, bufsize = 0, buflen = 0;  -        size_t maxsize = TCheck::GetMaxHeaderSize();  -        do {  -            if (buffree < TCP_MIN) {  -                if (buf) {  -                    TAlloc::Shrink(buf, buflen - buffree);  -                    if (TWriter::Write(buf, buflen - buffree) < 0) {  +        ssize_t got; +        size_t buffree = 0, bufsize = 0, buflen = 0; +        size_t maxsize = TCheck::GetMaxHeaderSize(); +        do { +            if (buffree < TCP_MIN) { +                if (buf) { +                    TAlloc::Shrink(buf, buflen - buffree); +                    if (TWriter::Write(buf, buflen - buffree) < 0) {                          buf = nullptr; -                        ret = EIO;  -                        break;  -                    }  -                }  -                if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) {  -                    ret = ENOMEM;  -                    break;  -                }  -                bufptr = buf;  -                buffree = buflen;  -            }  -            if ((got = TAgent::read(bufptr, buffree)) < 0) {  -                fetcherr = errno;  +                        ret = EIO; +                        break; +                    } +                } +                if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) { +                    ret = ENOMEM; +                    break; +                } +                bufptr = buf; +                buffree = buflen; +            } +            if ((got = TAgent::read(bufptr, buffree)) < 0) { +                fetcherr = errno;                  if (errno == EINTR)                      header->error = HTTP_INTERRUPTED;                  else if (errno == ETIMEDOUT) @@ -102,43 +102,43 @@ public:                  else                      header->error = HTTP_CONNECTION_LOST; -                break;  -            }  -  -            parsebuf = bufptr;  -            bufptr = (char*)bufptr + got;  -            bufsize += got;  -            buffree -= got;  -  -            THttpParser<TCheck>::Parse(parsebuf, got);  - -            if (header->error)  +                break; +            } + +            parsebuf = bufptr; +            bufptr = (char*)bufptr + got; +            bufsize += got; +            buffree -= got; + +            THttpParser<TCheck>::Parse(parsebuf, got); + +            if (header->error)                  break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached -  -            if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) {  -                inheader = 0;  -                if (TCheck::Check(header))  -                    break;  -                if (header->header_size > (long)maxsize) {  -                    header->error = HTTP_HEADER_TOO_LARGE;  -                    break;  -                }  + +            if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) { +                inheader = 0; +                if (TCheck::Check(header)) +                    break; +                if (header->header_size > (long)maxsize) { +                    header->error = HTTP_HEADER_TOO_LARGE; +                    break; +                }              }              if (!inheader) { -                maxsize = TCheck::GetMaxBodySize(header);  -            }  -            if (header->http_status >= HTTP_EXTENDED)  -                break;  -            if (bufsize > maxsize) {  -                header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE;  -                break;  -            }  -            if (TerminateNow) {  -                header->error = HTTP_INTERRUPTED;  -                break;  -            }  -        } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof);  -  +                maxsize = TCheck::GetMaxBodySize(header); +            } +            if (header->http_status >= HTTP_EXTENDED) +                break; +            if (bufsize > maxsize) { +                header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE; +                break; +            } +            if (TerminateNow) { +                header->error = HTTP_INTERRUPTED; +                break; +            } +        } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof); +          i64 Adjustment = 0;          if (!header->error) {              if (header->transfer_chunked) { @@ -150,22 +150,22 @@ public:                  Adjustment = 0;          } -        if (buf) {  +        if (buf) {              TAlloc::Shrink(buf, buflen - buffree + Adjustment); -            if (TWriter::Write(buf, buflen - buffree) < 0)  -                ret = EIO;  -        }  -        TCheck::CheckEndDoc(header);  -        if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) {  -            TAgent::Disconnect();  -            if (!fetcherr)  -                fetcherr = errno;  -        }  -        errno = fetcherr;  -        return ret;  -    }  -};  -  -template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent>  -int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0;  +            if (TWriter::Write(buf, buflen - buffree) < 0) +                ret = EIO; +        } +        TCheck::CheckEndDoc(header); +        if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) { +            TAgent::Disconnect(); +            if (!fetcherr) +                fetcherr = errno; +        } +        errno = fetcherr; +        return ret; +    } +}; + +template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent> +int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0; diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h index 62a27b65614..c4abdcd0d23 100644 --- a/library/cpp/http/fetch/httpfsm.h +++ b/library/cpp/http/fetch/httpfsm.h @@ -1,13 +1,13 @@  #pragma once -  +  #include "httpheader.h" -#include <util/system/maxlen.h>  +#include <util/system/maxlen.h>  #include <util/datetime/parser.h> -  +  #include <time.h> -struct THttpHeaderParser {  +struct THttpHeaderParser {      static constexpr int ErrFirstlineTypeMismatch = -3;      static constexpr int ErrHeader = -2;      static constexpr int Err = -1; @@ -16,34 +16,34 @@ struct THttpHeaderParser {      static constexpr int Accepted = 2;      int Execute(const void* inBuf, size_t len) { -        return execute((unsigned char*)inBuf, (int)len);  -    }  -  +        return execute((unsigned char*)inBuf, (int)len); +    } +      int Execute(TStringBuf str) {          return Execute(str.data(), str.size());      }      int Init(THttpHeader* h) {          int ret = Init((THttpBaseHeader*)(h)); -        hd = h;  -        hd->Init();  +        hd = h; +        hd->Init();          hreflangpos = hd->hreflangs;          hreflangspace = HREFLANG_MAX;          return ret; -    }  -  +    } +      int Init(THttpAuthHeader* h) { -        int ret = Init((THttpHeader*)(h));  -        auth_hd = h;  -        return ret;  -    }  +        int ret = Init((THttpHeader*)(h)); +        auth_hd = h; +        return ret; +    }      int Init(THttpRequestHeader* h) {          int ret = Init((THttpBaseHeader*)(h));          request_hd = h;          request_hd->Init();          return ret;      } -  +      THttpHeader* hd;      long I;      int Dc; @@ -51,7 +51,7 @@ struct THttpHeaderParser {      char buf[FETCHER_URL_MAX];      size_t buflen;      char* lastchar; -  +      const unsigned char* langstart;      size_t langlen; @@ -62,10 +62,10 @@ struct THttpHeaderParser {      THttpAuthHeader* auth_hd;      THttpRequestHeader* request_hd; -  -private:  + +private:      THttpBaseHeader* base_hd; -    int cs;  +    int cs;  private:      int Init(THttpBaseHeader* header) { @@ -78,27 +78,27 @@ private:      }      int execute(unsigned char* inBuf, int len); -    void init();  -};  -  -struct THttpChunkParser {  +    void init(); +}; + +struct THttpChunkParser {      int Execute(const void* inBuf, int len) { -        return execute((unsigned char*)inBuf, len);  -    }  -  -    int Init() {  -        init();  -        return 0;  -    }  -  +        return execute((unsigned char*)inBuf, len); +    } + +    int Init() { +        init(); +        return 0; +    } +      int chunk_length;      char* lastchar;      long I;      int Dc;      i64 cnt64; -  -private:  -    int cs;  + +private: +    int cs;      int execute(unsigned char* inBuf, int len); -    void init();  -};  +    void init(); +}; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index 83557b144e5..eab0328b187 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -1,70 +1,70 @@ -#include <stdio.h>  -#include <time.h>  -  +#include <stdio.h> +#include <time.h> +  #include <library/cpp/charset/doccodes.h>  #include <library/cpp/charset/codepage.h>  #include <library/cpp/http/misc/httpcodes.h>  #include <util/datetime/base.h>  #include <util/generic/ylimits.h>  #include <algorithm>    // max -  +  #include <library/cpp/http/fetch/httpheader.h>  #include <library/cpp/http/fetch/httpfsm.h> -  +  #ifdef _MSC_VER  #pragma warning(disable: 4702)  // unreachable code  #endif  #define c(i) I = i;  #define m(i) I = std::max(I, (long)i); -  -static inline int X(unsigned char c) {  -    return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));  -}  -  -template <typename x>  -static inline void guard(x &val) {  -    val = (val >= -1) ? -4 - val : -2; // f(-2) = -2  -}  -  -template <typename x>  -static inline void setguarded(x &val, long cnt) {  -    val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;  -}  -  -////////////////////////////////////////////////////////////////////  -/// HTTP PARSER  -////////////////////////////////////////////////////////////////////  -  -%%{  -machine http_header_parser;  -  + +static inline int X(unsigned char c) { +    return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); +} + +template <typename x> +static inline void guard(x &val) { +    val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 +} + +template <typename x> +static inline void setguarded(x &val, long cnt) { +    val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; +} + +//////////////////////////////////////////////////////////////////// +/// HTTP PARSER +//////////////////////////////////////////////////////////////////// + +%%{ +machine http_header_parser; +  include HttpDateTimeParser "../../../../util/datetime/parser.rl6"; -alphtype unsigned char;  -  -################# 2.2 Basic Rules #################  -eol             = '\r'? '\n';  -ws              = [ \t];  -lw              = '\r'? '\n'? ws;  +alphtype unsigned char; + +################# 2.2 Basic Rules ################# +eol             = '\r'? '\n'; +ws              = [ \t]; +lw              = '\r'? '\n'? ws;  separator       = [()<>@,;:\\"/\[\]?={}]; -token_char      = [!-~] - separator;          # http tokens chars  -url_char        = [!-~] - ["<>\[\]\\^`{}|];   # uric chars  -text_char       = ws | 33..126 | 128..255;  -any_text_char   = any - [\r\n];  -  -lws             = lw*;  -eoh             = lws eol;  -token           = token_char+;  -ex_token        = (token_char | ws)* token_char;  -text            = (text_char | lw)*;  -any_text        = (any_text_char | lw)*;  -def             = lws ':' lws;  -  +token_char      = [!-~] - separator;          # http tokens chars +url_char        = [!-~] - ["<>\[\]\\^`{}|];   # uric chars +text_char       = ws | 33..126 | 128..255; +any_text_char   = any - [\r\n]; + +lws             = lw*; +eoh             = lws eol; +token           = token_char+; +ex_token        = (token_char | ws)* token_char; +text            = (text_char | lw)*; +any_text        = (any_text_char | lw)*; +def             = lws ':' lws; +  action clear_buf   { buflen = 0; }  action update_buf  { if (buflen < sizeof(buf)) buf[buflen++] = fc; } -  -###################################################  + +###################################################  ############ response status line #################  action set_minor  { base_hd->http_minor = I; }  action set_status { @@ -75,14 +75,14 @@ action set_status {          return -3;      }  } -  +  status_code          = int3;  http_major           = int;  http_minor           = int;  reason_phrase        = ws+ text_char*;  http_version         = "http/"i http_major '.' http_minor %set_minor;  response_status_line = http_version ws+ status_code reason_phrase? eol %set_status; -  +  ############ request status line #################  action set_request_uri {      if (request_hd && buflen < FETCHER_URL_MAX) { @@ -116,44 +116,44 @@ request_uri          = (token_char | separator)+ >clear_buf $update_buf                          %set_request_uri;  request_status_line   = http_method ws+ request_uri ws+ http_version eoh; -################# connection ######################  +################# connection ######################  action beg_connection { guard(base_hd->connection_closed); I = -1; }  action set_connection { setguarded(base_hd->connection_closed, I); } -  -c_token         = "close"i      %{m(1)}  -                | "keep-alive"i %{m(0)};  -c_tokenlist     = c_token (lws ',' lws c_token)?;  + +c_token         = "close"i      %{m(1)} +                | "keep-alive"i %{m(0)}; +c_tokenlist     = c_token (lws ',' lws c_token)?;  connection      = "connection"i def %beg_connection c_tokenlist eoh %set_connection; -  -################# content-encoding ################  + +################# content-encoding ################  action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; }  action set_content_encoding { base_hd->compression_method =                               ((base_hd->compression_method == HTTP_COMPRESSION_UNSET ||                                 base_hd->compression_method == I) ?                                   I : (int)HTTP_COMPRESSION_ERROR); } -  -ce_tokenlist      = "identity"i   %{c(HTTP_COMPRESSION_IDENTITY)}  -                  | "gzip"i       %{c(HTTP_COMPRESSION_GZIP)}  -                  | "x-gzip"i     %{c(HTTP_COMPRESSION_GZIP)}  -                  | "deflate"i    %{c(HTTP_COMPRESSION_DEFLATE)}  -                  | "compress"i   %{c(HTTP_COMPRESSION_COMPRESS)}  -                  | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};  + +ce_tokenlist      = "identity"i   %{c(HTTP_COMPRESSION_IDENTITY)} +                  | "gzip"i       %{c(HTTP_COMPRESSION_GZIP)} +                  | "x-gzip"i     %{c(HTTP_COMPRESSION_GZIP)} +                  | "deflate"i    %{c(HTTP_COMPRESSION_DEFLATE)} +                  | "compress"i   %{c(HTTP_COMPRESSION_COMPRESS)} +                  | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};  content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding; -  -################# transfer-encoding ###############  + +################# transfer-encoding ###############  action beg_encoding { guard(base_hd->transfer_chunked); }  action set_encoding { setguarded(base_hd->transfer_chunked, I); } -  -e_tokenlist       = "identity"i %{c(0)}  -                  | "chunked"i  %{c(1)};  + +e_tokenlist       = "identity"i %{c(0)} +                  | "chunked"i  %{c(1)};  transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding; -  -################# content-length ##################  + +################# content-length ##################  action beg_content_length { guard(base_hd->content_length); }  action set_content_length { setguarded(base_hd->content_length, I); } -  +  content_length  = "content-length"i def %beg_content_length int eoh %set_content_length; -  +  ################# content-range ###################  action beg_content_range_start { guard(base_hd->content_range_start); I = -1; }  action set_content_range_start { setguarded(base_hd->content_range_start, I); } @@ -166,7 +166,7 @@ content_range   = "content-range"i def "bytes"i sp %beg_content_range_start int                                                     %beg_content_range_end int '/' %set_content_range_end                                                     %beg_content_range_el int eoh %set_content_range_el; -################# accept-ranges ###################  +################# accept-ranges ###################  action beg_accept_ranges {      if (hd) {          guard(hd->accept_ranges); @@ -174,21 +174,21 @@ action beg_accept_ranges {      }  }  action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); } -  -ar_tokenlist    = "bytes"i %{c(1)}  -                | "none"i  %{c(0)};  + +ar_tokenlist    = "bytes"i %{c(1)} +                | "none"i  %{c(0)};  accept_ranges   = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges; -  -################# content-type ####################  + +################# content-type ####################  action beg_mime { guard(base_hd->mime_type); }  action set_mime { setguarded(base_hd->mime_type, I); }  action set_charset {      if (buflen < FETCHER_URL_MAX) { -        buf[buflen++] = 0;  +        buf[buflen++] = 0;          base_hd->charset = EncodingHintByName((const char*)buf); -    }  -}  -  +    } +} +  mime_type       = "text/plain"i                     %{c(MIME_TEXT)}                  | "text/html"i                      %{c(MIME_HTML)}                  | "application/pdf"i                %{c(MIME_PDF)} @@ -234,36 +234,36 @@ mime_type       = "text/plain"i                     %{c(MIME_TEXT)}  charset_name    = token_char+ >clear_buf $update_buf;  mime_param      = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset   @2 -                | token ws* '=' ws* '"'? token '"'?                           @1  -                | text                                                        $0;  -mime_parms      = (lws ';' lws mime_param)*;  +                | token ws* '=' ws* '"'? token '"'?                           @1 +                | text                                                        $0; +mime_parms      = (lws ';' lws mime_param)*;  content_type    = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime; -  -################# last modified ###################  + +################# last modified ###################  action beg_modtime { guard(base_hd->http_time); }  action set_modtime {      setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1));  } -  +  last_modified   = "last-modified"i def %beg_modtime http_date eoh %set_modtime; -  -################# location ########################  + +################# location ########################  action set_location {      while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) {          buflen --;      }      if (hd && buflen < FETCHER_URL_MAX) {          hd->location = TStringBuf(buf, buflen); -    }  -}  -  +    } +} +  action set_status_303{ if (hd) hd->http_status = 303; } -  +  url             = url_char+ >clear_buf $update_buf;  loc_url         = any_text_char+ >clear_buf $update_buf;  location        = "location"i def loc_url eoh %set_location;  refresh         = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; -  +  ################# x-robots-tag ################  action set_x_robots {      if (hd && AcceptingXRobots) { @@ -349,56 +349,56 @@ action set_squid_error {  squid_error     = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; -################# auth ########################  +################# auth ########################  action init_auth { -    if (auth_hd)  -        auth_hd->use_auth=true;  -}  -  +    if (auth_hd) +        auth_hd->use_auth=true; +} +  action update_auth_buf -    { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }  -  -quoted_str = /"/ (text_char - /"/)* /"/ >2;  +    { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } + +quoted_str = /"/ (text_char - /"/)* /"/ >2;  auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2; -  -#  do not support auth-int, too heavy procedure  -  -qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };  -  -qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );  -  -auth_good_param = ( "nonce"i /=/ auth_quoted_str )  + +#  do not support auth-int, too heavy procedure + +qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; + +qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); + +auth_good_param = ( "nonce"i /=/ auth_quoted_str )                      %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { -                        buf[buflen++] = 0;  -                        auth_hd->nonce = strdup((const char*)buf);  -                     }}  -                | ( "realm"i /=/ auth_quoted_str )  +                        buf[buflen++] = 0; +                        auth_hd->nonce = strdup((const char*)buf); +                     }} +                | ( "realm"i /=/ auth_quoted_str )                      %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { -                        buf[buflen++] = 0;  -                        auth_hd->realm = strdup((const char*)buf);  -                     }}  -                | ( "opaque"i /=/ auth_quoted_str )  +                        buf[buflen++] = 0; +                        auth_hd->realm = strdup((const char*)buf); +                     }} +                | ( "opaque"i /=/ auth_quoted_str )                       %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { -                         buf[buflen++] = 0;  -                         auth_hd->opaque = strdup((const char*)buf);  -                      }}  -                | "stale"i /=/ "true"i  -                     %{if (auth_hd) auth_hd->stale = true; }  -                | "algorithm"i /=/ "md5"i /-/ "sess"i  -                   %{if (auth_hd) auth_hd->algorithm = 1; }  -                | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);  -  -auth_param = auth_good_param @1 |  -           ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )  -              /=/ (token | quoted_str ) ) $0;  -  -auth_params = auth_param ( ws* /,/ ws* auth_param )*;  -  +                         buf[buflen++] = 0; +                         auth_hd->opaque = strdup((const char*)buf); +                      }} +                | "stale"i /=/ "true"i +                     %{if (auth_hd) auth_hd->stale = true; } +                | "algorithm"i /=/ "md5"i /-/ "sess"i +                   %{if (auth_hd) auth_hd->algorithm = 1; } +                | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); + +auth_param = auth_good_param @1 | +           ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) +              /=/ (token | quoted_str ) ) $0; + +auth_params = auth_param ( ws* /,/ ws* auth_param )*; +  digest_challenge =  ("digest"i %init_auth ws+ auth_params) | -                    ((token-"digest"i) text);  -  -auth  = "www-authenticate"i def digest_challenge eoh;  -  +                    ((token-"digest"i) text); + +auth  = "www-authenticate"i def digest_challenge eoh; +  ###################### host #######################  action set_host {      if (request_hd && buflen < HOST_MAX) { @@ -562,8 +562,8 @@ action set_request_priority {  request_priority = "x-yandex-request-priority"i def int eoh                     %set_request_priority; -################# message header ##################  -other_header    = ( ex_token - "www-authenticate"i ) def any_text eoh;  +################# message header ################## +other_header    = ( ex_token - "www-authenticate"i ) def any_text eoh;  message_header  = other_header          $0                  | connection            @1                  | content_encoding      @1 @@ -595,90 +595,90 @@ request_header  = message_header        $0                  | request_cache_control @1                  | response_timeout      @1                  | request_priority      @1; -  -################# main ############################  + +################# main ############################  action accepted { lastchar = (char*)fpc; return 2; } -  +  main            := ((response_status_line ('\r'? response_header)*)                   | (request_status_line ('\r' ? request_header)*))                   eol @accepted; -  -}%%  -  -%% write data;  -  -int THttpHeaderParser::execute(unsigned char *inBuf, int len) {  -    const unsigned char *p = inBuf;  -    const unsigned char *pe = p + len;  -    %% write exec;  -    if (cs == http_header_parser_error)  -        return -1;  -    else if (cs == http_header_parser_first_final)  -        return 0;  -    else  -        return 1;  -}  -  -void THttpHeaderParser::init() {  -    %% write init;  -}  -  -%%{  -machine http_chunk_parser;  -  -alphtype unsigned char;  -  + +}%% + +%% write data; + +int THttpHeaderParser::execute(unsigned char *inBuf, int len) { +    const unsigned char *p = inBuf; +    const unsigned char *pe = p + len; +    %% write exec; +    if (cs == http_header_parser_error) +        return -1; +    else if (cs == http_header_parser_first_final) +        return 0; +    else +        return 1; +} + +void THttpHeaderParser::init() { +    %% write init; +} + +%%{ +machine http_chunk_parser; + +alphtype unsigned char; +  action clear_hex   { cnt64 = 0; }  action update_hex   { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; }  action set_chunk { chunk_length = static_cast<int>(cnt64); }  action accepted { lastchar = (char*)fpc; return 2; } -  -eol             = '\r'? '\n';  -ws              = [ \t];  -sp              = ' ';  -lw              = '\r'? '\n'? ws;  -separator       = [()<>@,;:\\"/\[\]?={}];  -token_char      = [!-~] - separator;          # http tokens chars  -url_char        = [!-~] - ["<>\[\]\\^`{}|];   # uric chars  -text_char       = ws | 33..127 | 160..255;  -  -lws             = lw*;  -eoh             = lws eol;  -token           = token_char+;  -text            = (text_char | lw)*;  -def             = lws ':' lws;  -  + +eol             = '\r'? '\n'; +ws              = [ \t]; +sp              = ' '; +lw              = '\r'? '\n'? ws; +separator       = [()<>@,;:\\"/\[\]?={}]; +token_char      = [!-~] - separator;          # http tokens chars +url_char        = [!-~] - ["<>\[\]\\^`{}|];   # uric chars +text_char       = ws | 33..127 | 160..255; + +lws             = lw*; +eoh             = lws eol; +token           = token_char+; +text            = (text_char | lw)*; +def             = lws ':' lws; +  hex             = (xdigit+) >clear_hex $update_hex; -quoted_string   = '"' ((text_char - '"') $0 | '\\"' @1)* '"';  -  -chunk_ext_val   = token | quoted_string;  -chunk_ext_name  = token;  -chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;  -  -entity_header   = token def text eoh;  -trailer         = entity_header*;  -  +quoted_string   = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; + +chunk_ext_val   = token | quoted_string; +chunk_ext_name  = token; +chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; + +entity_header   = token def text eoh; +trailer         = entity_header*; +  chunk           = (hex - '0'+) chunk_extension? %set_chunk; -last_chunk      = '0'+ chunk_extension? eol trailer;  +last_chunk      = '0'+ chunk_extension? eol trailer;  main           := eol (chunk $0 | last_chunk @1) eol @accepted; -  -}%%  -  -%% write data;  -  -int THttpChunkParser::execute(unsigned char *inBuf, int len) {  -    const unsigned char *p = inBuf;  -    const unsigned char *pe = p + len;  -    %% write exec;  -    if (cs == http_chunk_parser_error)  -        return -1;  -    else if (cs == http_chunk_parser_first_final)  -        return 0;  -    else  -        return 1;  -}  -  -void THttpChunkParser::init() {  -    chunk_length = 0;  -    %% write init;  -}  + +}%% + +%% write data; + +int THttpChunkParser::execute(unsigned char *inBuf, int len) { +    const unsigned char *p = inBuf; +    const unsigned char *pe = p + len; +    %% write exec; +    if (cs == http_chunk_parser_error) +        return -1; +    else if (cs == http_chunk_parser_first_final) +        return 0; +    else +        return 1; +} + +void THttpChunkParser::init() { +    chunk_length = 0; +    %% write init; +} diff --git a/library/cpp/http/fetch/httpheader.h b/library/cpp/http/fetch/httpheader.h index 20f8e0956b1..b2810bbd413 100644 --- a/library/cpp/http/fetch/httpheader.h +++ b/library/cpp/http/fetch/httpheader.h @@ -1,5 +1,5 @@  #pragma once -  +  #include "exthttpcodes.h"  #include <library/cpp/mime/types/mime.h> @@ -11,11 +11,11 @@  #include <util/system/maxlen.h>  #include <ctime> -#include <cstdio>  -#include <cstdlib>  -#include <cstring>  +#include <cstdio> +#include <cstdlib> +#include <cstring>  #include <algorithm> -  +  // This is ugly solution but here a lot of work to do it the right way.  #define FETCHER_URL_MAX 8192 @@ -29,16 +29,16 @@ extern const i32 DEFAULT_RESPONSE_TIMEOUT;  /// == -1  #define MAX_LANGREGION_LEN 4  #define MAXWORD_LEN 55 -enum HTTP_COMPRESSION {  +enum HTTP_COMPRESSION {      HTTP_COMPRESSION_UNSET = 0,      HTTP_COMPRESSION_ERROR = 1, -    HTTP_COMPRESSION_IDENTITY = 2,  +    HTTP_COMPRESSION_IDENTITY = 2,      HTTP_COMPRESSION_GZIP = 3,      HTTP_COMPRESSION_DEFLATE = 4, -    HTTP_COMPRESSION_COMPRESS = 5,  +    HTTP_COMPRESSION_COMPRESS = 5,      HTTP_COMPRESSION_MAX = 6 -};  -  +}; +  enum HTTP_METHOD {      HTTP_METHOD_UNDEFINED = -1,      HTTP_METHOD_OPTIONS, @@ -78,25 +78,25 @@ public:      TString base;  public: -    void Init() {  +    void Init() {          error = 0; -        header_size = 0;  -        entity_size = 0;  -        content_length = -1;  -        http_time = -1;  -        http_minor = -1;  -        mime_type = -1;  -        charset = -1;  -        compression_method = HTTP_COMPRESSION_UNSET;  -        transfer_chunked = -1;  +        header_size = 0; +        entity_size = 0; +        content_length = -1; +        http_time = -1; +        http_minor = -1; +        mime_type = -1; +        charset = -1; +        compression_method = HTTP_COMPRESSION_UNSET; +        transfer_chunked = -1;          connection_closed = HTTP_CONNECTION_UNDEFINED;          content_range_start = -1;          content_range_end = -1;          content_range_entity_length = -1;          base.clear(); -    }  -  -    void Print() const {  +    } + +    void Print() const {          printf("content_length: %" PRIi64 "\n", content_length);          printf("http_time: %" PRIi64 "\n", http_time);          printf("http_minor: %" PRIi8 "\n", http_minor); @@ -110,22 +110,22 @@ public:          printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length);          printf("base: \"%s\"\n", base.c_str());          printf("error: %" PRIi16 "\n", error); -    }  -  -    int SetBase(const char* path,  +    } + +    int SetBase(const char* path,                  const char* hostNamePtr = nullptr,                  int hostNameLength = 0) {          if (*path == '/') {              base = "http://";              base += TStringBuf(hostNamePtr, hostNameLength);              base += path; -        } else {  +        } else {              base = path; -        }  -        return error;  -    }  -};  -  +        } +        return error; +    } +}; +  enum { HREFLANG_MAX = FETCHER_URL_MAX * 2 };  /// Class represents Http Response Header.  struct THttpHeader: public THttpBaseHeader { @@ -165,7 +165,7 @@ public:  };  struct THttpRequestHeader: public THttpBaseHeader { -public:  +public:      TString request_uri;      char host[HOST_MAX];      char from[MAXWORD_LEN]; @@ -184,7 +184,7 @@ public:      THttpRequestHeader() {          Init();      } -  +      void Init() {          request_uri.clear();          host[0] = 0; @@ -201,7 +201,7 @@ public:          if_modified_since = DEFAULT_IF_MODIFIED_SINCE;          THttpBaseHeader::Init();      } -  +      void Print() const {          THttpBaseHeader::Print();          printf("request_uri: \"%s\"\n", request_uri.c_str()); @@ -213,7 +213,7 @@ public:          printf("max_age: %" PRIi32 "\n", max_age);          printf("if_modified_since: %" PRIi64 "\n", if_modified_since);      } -  +      /// It doesn't care about errors in request or headers, where      /// request_uri equals to '*'.      /// This returns copy of the string, which you have to delete. @@ -225,20 +225,20 @@ public:              url = HTTP_PREFIX;              url += host;              url += request_uri; -        }  +        }          return url;      } -  +      char* GetUrl(char* buffer, size_t size) {          if (host[0] == 0 || !strcmp(host, "")) {              strlcpy(buffer, request_uri.c_str(), size);          } else {              snprintf(buffer, size, "http://%s%s", host, request_uri.c_str()); -        }  +        }          return buffer;      }  }; -  +  class THttpAuthHeader: public THttpHeader {  public:      char* realm; @@ -282,6 +282,6 @@ public:              printf("stale: %d\n", stale);              printf("algorithm: %d\n", algorithm);              printf("qop_auth: %d\n", qop_auth); -        }  +        }      } -};  +}; diff --git a/library/cpp/http/fetch/httpload.cpp b/library/cpp/http/fetch/httpload.cpp index f944d7906a3..82ea8900b5c 100644 --- a/library/cpp/http/fetch/httpload.cpp +++ b/library/cpp/http/fetch/httpload.cpp @@ -1,7 +1,7 @@ -#include "httpload.h"  -  -/************************************************************/  -/************************************************************/  +#include "httpload.h" + +/************************************************************/ +/************************************************************/  httpAgentReader::httpAgentReader(httpSpecialAgent& agent,                                   const char* baseUrl,                                   bool assumeConnectionClosed, @@ -13,109 +13,109 @@ httpAgentReader::httpAgentReader(httpSpecialAgent& agent,      , BufPtr_(Buffer_)      , BufSize_(bufSize)      , BufRest_(0) -{  -    HeadRequest = false;  +{ +    HeadRequest = false;      Header = &Header_; -    if (use_auth)  +    if (use_auth)          HeaderParser.Init(&Header_); -    else  -        HeaderParser.Init(Header);  +    else +        HeaderParser.Init(Header);      setAssumeConnectionClosed(assumeConnectionClosed ? 1 : 0);      Header_.SetBase(baseUrl); -  +      if (Header_.error) -        State = hp_error;  -    else  -        State = hp_in_header;  -}  -  -/************************************************************/  +        State = hp_error; +    else +        State = hp_in_header; +} + +/************************************************************/  httpAgentReader::~httpAgentReader() {      delete[] Buffer_; -}  -  -/************************************************************/  +} + +/************************************************************/  void httpAgentReader::readBuf() {      assert(BufRest_ == 0);      if (!BufPtr_) {          BufRest_ = -1; -        return;  -    }  -  +        return; +    } +      BufRest_ = Agent_.read(Buffer_, BufSize_);      if (BufRest_ <= 0) {          BufRest_ = -1;          BufPtr_ = nullptr; -    } else {  +    } else {          BufPtr_ = Buffer_; -        //cout << "BUF: " << mBuffer << endl << endl;  -    }  -}  -  -/************************************************************/  +        //cout << "BUF: " << mBuffer << endl << endl; +    } +} + +/************************************************************/  const THttpHeader* httpAgentReader::readHeader() {      while (State == hp_in_header) {          if (!step()) {              Header_.error = HTTP_CONNECTION_LOST;              return nullptr; -        }  +        }          ParseGeneric(BufPtr_, BufRest_); -    }  +    }      if (State == hp_eof || State == hp_error) {          BufPtr_ = nullptr;          BufRest_ = -1; -    }  +    }      if (State == hp_error || Header_.error)          return nullptr;      return &Header_; -}  -  -/************************************************************/  +} + +/************************************************************/  long httpAgentReader::readPortion(void*& buf) {      assert(State != hp_in_header); -  -    long Chunk = 0;  + +    long Chunk = 0;      do {          if (BufSize_ == 0 && !BufPtr_) -            return 0;  -  -        if (!step())  -            return 0;  -  +            return 0; + +        if (!step()) +            return 0; +          Chunk = ParseGeneric(BufPtr_, BufRest_);          buf = BufPtr_; -  +          if (State == hp_error && Header_.entity_size > Header_.content_length) {              Chunk -= (Header_.entity_size - Header_.content_length);              BufPtr_ = (char*)BufPtr_ + Chunk;              BufRest_ = 0;              State = hp_eof;              Header_.error = 0; -            break;  -        }  -  +            break; +        } +          BufPtr_ = (char*)BufPtr_ + Chunk;          BufRest_ -= Chunk; -  +          if (State == hp_eof || State == hp_error) {              BufRest_ = -1;              BufPtr_ = nullptr; -        }  -    } while (!Chunk);  -    return Chunk;  -}  -  -/************************************************************/  +        } +    } while (!Chunk); +    return Chunk; +} + +/************************************************************/  bool httpAgentReader::skipTheRest() {      void* b; -    while (!eof())  -        readPortion(b);  +    while (!eof()) +        readPortion(b);      return (State == hp_eof); -}  -  -/************************************************************/  -/************************************************************/  +} + +/************************************************************/ +/************************************************************/  httpLoadAgent::httpLoadAgent(bool handleAuthorization,                               socketHandlerFactory& factory)      : Factory_(factory) @@ -126,82 +126,82 @@ httpLoadAgent::httpLoadAgent(bool handleAuthorization,      , Headers_()      , ErrCode_(0)      , RealHost_(nullptr) -{  -}  -  -/************************************************************/  +{ +} + +/************************************************************/  httpLoadAgent::~httpLoadAgent() {      delete Reader_;      free(RealHost_); -}  -  -/************************************************************/  +} + +/************************************************************/  void httpLoadAgent::clearReader() {      if (Reader_) { -        bool opened = false;  +        bool opened = false;          if (PersistentConn_) {              const THttpHeader* H = Reader_->readHeader();              if (H && !H->connection_closed) {                  Reader_->skipTheRest(); -                opened = true;  -            }  -        }  -        if (!opened)  -            Disconnect();  +                opened = true; +            } +        } +        if (!opened) +            Disconnect();          delete Reader_;          Reader_ = nullptr; -    }  +    }      ErrCode_ = 0; -}  -/************************************************************/  +} +/************************************************************/  void httpLoadAgent::setRealHost(const char* hostname) {      free(RealHost_); -    if (hostname)  +    if (hostname)          RealHost_ = strdup(hostname); -    else  +    else          RealHost_ = nullptr;      ErrCode_ = 0; -}  -  -/************************************************************/  +} + +/************************************************************/  void httpLoadAgent::setIMS(const char* ifModifiedSince) { -    char ims_buf[100];  -    snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n",  -             ifModifiedSince);  +    char ims_buf[100]; +    snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n", +             ifModifiedSince);      Headers_.push_back(ims_buf); -}  -  -/************************************************************/  +} + +/************************************************************/  void httpLoadAgent::addHeaderInstruction(const char* instr) {      Headers_.push_back(instr); -}  -  -/************************************************************/  +} + +/************************************************************/  void httpLoadAgent::dropHeaderInstructions() {      Headers_.clear(); -}  -  -/************************************************************/  +} + +/************************************************************/  bool httpLoadAgent::startRequest(const THttpURL& url,                                   bool persistent,                                   const TAddrList& addrs) -{  -    clearReader();  +{ +    clearReader();      ErrCode_ = 0; -  +      URL_.Clear();      URL_ = url;      PersistentConn_ = persistent;      if (!URL_.IsValidAbs()) -        return false;  +        return false;      if (!HandleAuthorization_ && !URL_.IsNull(THttpURL::FlagAuth)) -        return false;  -  +        return false; +      return doSetHost(addrs) && doStartRequest(); -}  -  -/************************************************************/  +} + +/************************************************************/  bool httpLoadAgent::startRequest(const char* url,                                   const char* url_to_merge,                                   bool persistent, @@ -226,148 +226,148 @@ bool httpLoadAgent::startRequest(const char* url,                                   const char* url_to_merge,                                   bool persistent,                                   ui32 ip) { -    clearReader();  -  +    clearReader(); +      URL_.Clear();      PersistentConn_ = persistent; -  -    long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;  + +    long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;      if (HandleAuthorization_) -        flags |= THttpURL::FeatureAuthSupported;  -  +        flags |= THttpURL::FeatureAuthSupported; +      if (URL_.Parse(url, flags, url_to_merge) || !URL_.IsValidGlobal()) -        return false;  -  +        return false; +      return doSetHost(TAddrList::MakeV4Addr(ip, URL_.GetPort())) && doStartRequest(); -}  -  -/************************************************************/  +} + +/************************************************************/  bool httpLoadAgent::doSetHost(const TAddrList& addrs) {      socketAbstractHandler* h = Factory_.chooseHandler(URL_); -    if (!h)  -        return false;  -    Socket.setHandler(h);  -  +    if (!h) +        return false; +    Socket.setHandler(h); +      if (addrs.size()) {          ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost),                             URL_.GetPort(), addrs); -    } else {  +    } else {          ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost),                             URL_.GetPort()); -    }  +    }      if (ErrCode_) -        return false;  -  +        return false; +      if (RealHost_) { -        free(Hostheader);  +        free(Hostheader);          Hostheader = (char*)malloc(strlen(RealHost_) + 20);          sprintf(Hostheader, "Host: %s\r\n", RealHost_); -    }  -  +    } +      if (!URL_.IsNull(THttpURL::FlagAuth)) {          if (!HandleAuthorization_) {              ErrCode_ = HTTP_UNAUTHORIZED; -            return false;  -        }  -  +            return false; +        } +          Digest_.setAuthorization(URL_.Get(THttpURL::FieldUsername),                                   URL_.Get(THttpURL::FieldPassword)); -    }  -  -    return true;  -}  -  -/************************************************************/  +    } + +    return true; +} + +/************************************************************/  bool httpLoadAgent::setHost(const char* host_url,                              const TAddrList& addrs) { -    clearReader();  -  +    clearReader(); +      URL_.Clear();      PersistentConn_ = true; -  -    long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;  + +    long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet;      if (HandleAuthorization_) -        flags |= THttpURL::FeatureAuthSupported;  -  +        flags |= THttpURL::FeatureAuthSupported; +      if (URL_.Parse(host_url, flags) || !URL_.IsValidGlobal()) -        return false;  -  +        return false; +      return doSetHost(addrs); -}  -  -/************************************************************/  +} + +/************************************************************/  bool httpLoadAgent::startOneRequest(const char* local_url) { -    clearReader();  -  -    THttpURL lURL;  +    clearReader(); + +    THttpURL lURL;      if (lURL.Parse(local_url, THttpURL::FeaturesNormalizeSet) || lURL.IsValidGlobal()) -        return false;  -  +        return false; +      URL_.SetInMemory(THttpURL::FieldPath, lURL.Get(THttpURL::FieldPath));      URL_.SetInMemory(THttpURL::FieldQuery, lURL.Get(THttpURL::FieldQuery));      URL_.Rewrite(); -  -    return doStartRequest();  -}  -  -/************************************************************/  + +    return doStartRequest(); +} + +/************************************************************/  bool httpLoadAgent::doStartRequest() {      TString urlStr = URL_.PrintS(THttpURL::FlagPath | THttpURL::FlagQuery); -    if (!urlStr)  -        urlStr = "/";  -  +    if (!urlStr) +        urlStr = "/"; +      for (int step = 0; step < 10; step++) {          const char* digestHeader = Digest_.getHeaderInstruction(); -  +          unsigned i = (digestHeader) ? 2 : 1; -        const char** headers =  +        const char** headers =              (const char**)(alloca((i + Headers_.size()) * sizeof(char*))); -  +          for (i = 0; i < Headers_.size(); i++)              headers[i] = Headers_[i].c_str(); -        if (digestHeader)  -            headers[i++] = digestHeader;  +        if (digestHeader) +            headers[i++] = digestHeader;          headers[i] = nullptr; -  +          ErrCode_ = RequestGet(urlStr.c_str(), headers, PersistentConn_); -  +          if (ErrCode_) { -            Disconnect();  -            return false;  -        }  -  +            Disconnect(); +            return false; +        } +          TString urlBaseStr = URL_.PrintS(THttpURL::FlagNoFrag); -  -        clearReader();  + +        clearReader();          Reader_ = new httpAgentReader(*this, urlBaseStr.c_str(),                                        !PersistentConn_, !Digest_.empty()); -  +          if (Reader_->readHeader()) { -            //mReader->getHeader()->Print();  +            //mReader->getHeader()->Print();              if (getHeader()->http_status == HTTP_UNAUTHORIZED &&                  step < 1 &&                  Digest_.processHeader(getAuthHeader(),                                        urlStr.c_str(),                                        "GET")) { -                //mReader->skipTheRest();  +                //mReader->skipTheRest();                  delete Reader_;                  Reader_ = nullptr;                  ErrCode_ = 0; -                Disconnect();  -                continue;  -            }  -  -            return true;  -        }  -        Disconnect();  -        clearReader();  -  -        return false;  -    }  -  +                Disconnect(); +                continue; +            } + +            return true; +        } +        Disconnect(); +        clearReader(); + +        return false; +    } +      ErrCode_ = HTTP_UNAUTHORIZED; -    return false;  -}  -  -/************************************************************/  -/************************************************************/  +    return false; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/httpload.h b/library/cpp/http/fetch/httpload.h index 1441dd27b5e..e22e4b809ea 100644 --- a/library/cpp/http/fetch/httpload.h +++ b/library/cpp/http/fetch/httpload.h @@ -1,226 +1,226 @@  #pragma once -  +  #include "httpagent.h"  #include "httpparser.h"  #include "http_digest.h" -#include <util/system/compat.h>  -#include <util/string/vector.h>  +#include <util/system/compat.h> +#include <util/string/vector.h>  #include <util/network/ip.h>  #include <library/cpp/uri/http_url.h>  #include <library/cpp/http/misc/httpcodes.h> -  -/********************************************************/  -// Section 1: socket handlers  -/********************************************************/  -// The following classes allows to adopt template scheme  -// THttpAgent for work with socket by flexible  -// object-style scheme.  -  -/********************************************************/  -// This class is used as a base one for flexible  -// socket handling  + +/********************************************************/ +// Section 1: socket handlers +/********************************************************/ +// The following classes allows to adopt template scheme +// THttpAgent for work with socket by flexible +// object-style scheme. + +/********************************************************/ +// This class is used as a base one for flexible +// socket handling  class socketAbstractHandler { -public:  -    virtual bool Good() = 0;  -  +public: +    virtual bool Good() = 0; +      virtual int Connect(const TAddrList& addrs, TDuration Timeout) = 0; -  -    virtual void Disconnect() = 0;  -  -    virtual void shutdown() = 0;  -  -    virtual bool send(const char* message, ssize_t messlen) = 0;  -  -    virtual bool peek() = 0;  -  -    virtual ssize_t read(void* buffer, ssize_t buflen) = 0;  -  + +    virtual void Disconnect() = 0; + +    virtual void shutdown() = 0; + +    virtual bool send(const char* message, ssize_t messlen) = 0; + +    virtual bool peek() = 0; + +    virtual ssize_t read(void* buffer, ssize_t buflen) = 0; +      virtual ~socketAbstractHandler() {      } -  -protected:  + +protected:      socketAbstractHandler() {      } -};  -  -/********************************************************/  -// This class is used as a proxy between THttpAgent and  -// socketAbstractHandler  -// (it is used by template scheme,  -//  so it does not have virtual methods)  +}; + +/********************************************************/ +// This class is used as a proxy between THttpAgent and +// socketAbstractHandler +// (it is used by template scheme, +//  so it does not have virtual methods)  class TSocketHandlerPtr { -protected:  +protected:      socketAbstractHandler* Handler_; -  -public:  + +public:      TSocketHandlerPtr()          : Handler_(nullptr)      {      } -  +      virtual ~TSocketHandlerPtr() {          delete Handler_;      } -  +      int Good() {          return (Handler_ && Handler_->Good());      } -  +      int Connect(const TAddrList& addrs, TDuration Timeout) {          return (Handler_) ? Handler_->Connect(addrs, Timeout) : 1; -    }  -  +    } +      void Disconnect() {          if (Handler_)              Handler_->Disconnect(); -    }  -  +    } +      void shutdown() {          if (Handler_)              Handler_->shutdown(); -    }  -  +    } +      bool send(const char* message, ssize_t messlen) {          return (Handler_) ? Handler_->send(message, messlen) : false; -    }  -  +    } +      virtual bool peek() {          return (Handler_) ? Handler_->peek() : false; -    }  -  +    } +      virtual ssize_t read(void* buffer, ssize_t buflen) {          return (Handler_) ? Handler_->read(buffer, buflen) : 0; -    }  -  +    } +      void setHandler(socketAbstractHandler* handler) {          if (Handler_)              delete Handler_;          Handler_ = handler; -    }  -};  -  -/********************************************************/  -// Here is httpAgent that uses socketAbstractHandler class  -// ant its derivatives  +    } +}; + +/********************************************************/ +// Here is httpAgent that uses socketAbstractHandler class +// ant its derivatives  using httpSpecialAgent = THttpAgent<TSocketHandlerPtr>; -  -/********************************************************/  -// Regular handler is used as implementation of  -// socketAbstractHandler for work through HTTP protocol  + +/********************************************************/ +// Regular handler is used as implementation of +// socketAbstractHandler for work through HTTP protocol  class socketRegularHandler: public socketAbstractHandler {  protected:      TSimpleSocketHandler Socket_; -  +  public:      socketRegularHandler()          : Socket_()      {      } -  +      bool Good() override {          return Socket_.Good(); -    }  -  +    } +      int Connect(const TAddrList& addrs, TDuration Timeout) override {          return Socket_.Connect(addrs, Timeout); -    }  -  +    } +      void Disconnect() override {          Socket_.Disconnect(); -    }  -  +    } +      void shutdown() override { -        //Do not block writing to socket  -        //There are servers that works in a bad way with this  -        //mSocket.shutdown();  -    }  -  +        //Do not block writing to socket +        //There are servers that works in a bad way with this +        //mSocket.shutdown(); +    } +      bool send(const char* message, ssize_t messlen) override {          return Socket_.send(message, messlen); -    }  -  +    } +      bool peek() override {          return Socket_.peek(); -    }  -  +    } +      ssize_t read(void* buffer, ssize_t buflen) override {          return Socket_.read(buffer, buflen); -    }  -};  -  -/********************************************************/  -// The base factory that allows to choose an appropriate  -// socketAbstractHandler implementation by url schema  +    } +}; + +/********************************************************/ +// The base factory that allows to choose an appropriate +// socketAbstractHandler implementation by url schema  class socketHandlerFactory {  public:      virtual ~socketHandlerFactory() {      } -  -    //returns mHandler_HTTP for correct HTTP-based url  + +    //returns mHandler_HTTP for correct HTTP-based url      virtual socketAbstractHandler* chooseHandler(const THttpURL& url); -  -    static socketHandlerFactory sInstance;  -};  -  -/********************************************************/  -// Section 2: the configurates tool to parse an HTTP-response  -/********************************************************/  -  + +    static socketHandlerFactory sInstance; +}; + +/********************************************************/ +// Section 2: the configurates tool to parse an HTTP-response +/********************************************************/ +  class httpAgentReader: public THttpParserGeneric<1> {  protected:      THttpAuthHeader Header_;      httpSpecialAgent& Agent_; -  +      char* Buffer_;      void* BufPtr_;      int BufSize_;      long BufRest_; -  -    void readBuf();  -  + +    void readBuf(); +      bool step() {          if (BufRest_ == 0) -            readBuf();  -        if (eof())  -            return false;  -        return true;  -    }  -  +            readBuf(); +        if (eof()) +            return false; +        return true; +    } +  public:      httpAgentReader(httpSpecialAgent& agent,                      const char* baseUrl,                      bool assumeConnectionClosed,                      bool use_auth = false,                      int bufSize = 0x1000); -  -    ~httpAgentReader();  -  + +    ~httpAgentReader(); +      bool eof() {          return BufRest_ < 0; -    }  -  +    } +      int error() {          return Header_.error; -    }  -  +    } +      void setError(int errCode) {          Header_.error = errCode; -    }  -  +    } +      const THttpAuthHeader* getAuthHeader() {          return &Header_; -    }  -  +    } +      const THttpHeader* readHeader();      long readPortion(void*& buf);      bool skipTheRest(); -};  -  -/********************************************************/  -// Section 3: the main class  -/********************************************************/  +}; + +/********************************************************/ +// Section 3: the main class +/********************************************************/  class httpLoadAgent: public httpSpecialAgent {  protected:      socketHandlerFactory& Factory_; @@ -232,76 +232,76 @@ protected:      int ErrCode_;      char* RealHost_;      httpDigestHandler Digest_; -  -    void clearReader();  + +    void clearReader();      bool doSetHost(const TAddrList& addrs); -    bool doStartRequest();  -  +    bool doStartRequest(); +  public:      httpLoadAgent(bool handleAuthorization = false,                    socketHandlerFactory& factory = socketHandlerFactory::sInstance); -    ~httpLoadAgent();  -  -    void setRealHost(const char* host);  +    ~httpLoadAgent(); + +    void setRealHost(const char* host);      void setIMS(const char* ifModifiedSince); -    void addHeaderInstruction(const char* instr);  -    void dropHeaderInstructions();  -  -    bool startRequest(const char* url,  +    void addHeaderInstruction(const char* instr); +    void dropHeaderInstructions(); + +    bool startRequest(const char* url,                        const char* url_to_merge = nullptr,                        bool persistent = false,                        const TAddrList& addrs = TAddrList()); -  +      // deprecated v4-only      bool startRequest(const char* url,                        const char* url_to_merge,                        bool persistent,                        ui32 ip); -    bool startRequest(const THttpURL& url,  +    bool startRequest(const THttpURL& url,                        bool persistent = false,                        const TAddrList& addrs = TAddrList()); -  +      bool setHost(const char* host_url,                   const TAddrList& addrs = TAddrList()); -  -    bool startOneRequest(const char* local_url);  -  + +    bool startOneRequest(const char* local_url); +      const THttpAuthHeader* getAuthHeader() {          if (Reader_ && Reader_->getAuthHeader()->use_auth)              return Reader_->getAuthHeader();          return nullptr; -    }  -  +    } +      const THttpHeader* getHeader() {          if (Reader_)              return Reader_->getAuthHeader();          return nullptr; -    }  -  +    } +      const THttpURL& getURL() {          return URL_; -    }  -  +    } +      bool eof() {          if (Reader_)              return Reader_->eof(); -        return true;  -    }  -  +        return true; +    } +      int error() {          if (ErrCode_)              return ErrCode_;          if (Reader_)              return Reader_->error(); -        return HTTP_BAD_URL;  -    }  -  +        return HTTP_BAD_URL; +    } +      long readPortion(void*& buf) {          if (Reader_)              return Reader_->readPortion(buf); -        return -1;  -    }  -};  -  -/********************************************************/  +        return -1; +    } +}; + +/********************************************************/ diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h index b6667070381..769828e4ae6 100644 --- a/library/cpp/http/fetch/httpparser.h +++ b/library/cpp/http/fetch/httpparser.h @@ -1,14 +1,14 @@  #pragma once -  -#include "httpfsm.h"  -#include "httpheader.h"  -  + +#include "httpfsm.h" +#include "httpheader.h" +  #include <library/cpp/mime/types/mime.h>  #include <util/system/yassert.h>  #include <library/cpp/http/misc/httpcodes.h>  template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20> -struct TFakeCheck {  +struct TFakeCheck {      bool Check(THttpHeader* /*header*/) {          return false;      } @@ -22,351 +22,351 @@ struct TFakeCheck {      size_t GetMaxBodySize(THttpHeader*) {          return bodymax;      } -};  -  -class THttpParserBase {  -public:  -    enum States {  -        hp_error,  -        hp_eof,  -        hp_in_header,  -        hp_read_alive,  -        hp_read_closed,  -        hp_begin_chunk_header,  -        hp_chunk_header,  -        hp_read_chunk  -    };  -  +}; + +class THttpParserBase { +public: +    enum States { +        hp_error, +        hp_eof, +        hp_in_header, +        hp_read_alive, +        hp_read_closed, +        hp_begin_chunk_header, +        hp_chunk_header, +        hp_read_chunk +    }; +      States GetState() {          return State;      } -  +      void setAssumeConnectionClosed(int value) { -        AssumeConnectionClosed = value;  -    }  -  +        AssumeConnectionClosed = value; +    } +      THttpHeader* GetHttpHeader() const {          return Header;      } -  -protected:  -    int CheckHeaders() {  + +protected: +    int CheckHeaders() {          if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) { -            Header->content_length = 0;  -            Header->transfer_chunked = 0;  -        }  -        if (Header->transfer_chunked < -1) {  -            Header->error = HTTP_BAD_ENCODING;  -            return 1;  -        } else if (Header->transfer_chunked == -1) {  -            Header->transfer_chunked = 0;  -        }  -        if (!Header->transfer_chunked && Header->content_length < -1) {  -            Header->error = HTTP_BAD_CONTENT_LENGTH;  -            return 1;  -        }  +            Header->content_length = 0; +            Header->transfer_chunked = 0; +        } +        if (Header->transfer_chunked < -1) { +            Header->error = HTTP_BAD_ENCODING; +            return 1; +        } else if (Header->transfer_chunked == -1) { +            Header->transfer_chunked = 0; +        } +        if (!Header->transfer_chunked && Header->content_length < -1) { +            Header->error = HTTP_BAD_CONTENT_LENGTH; +            return 1; +        }          if (Header->http_status == HTTP_OK) { -            if (Header->compression_method != HTTP_COMPRESSION_UNSET &&  -                Header->compression_method != HTTP_COMPRESSION_IDENTITY &&  -                Header->compression_method != HTTP_COMPRESSION_GZIP &&  +            if (Header->compression_method != HTTP_COMPRESSION_UNSET && +                Header->compression_method != HTTP_COMPRESSION_IDENTITY && +                Header->compression_method != HTTP_COMPRESSION_GZIP &&                  Header->compression_method != HTTP_COMPRESSION_DEFLATE)              { -                Header->error = HTTP_BAD_CONTENT_ENCODING;  -                return 1;  -            }  -        }  -        if (Header->connection_closed == -1)  -            Header->connection_closed = (Header->http_minor == 0 ||  -                                         AssumeConnectionClosed);  +                Header->error = HTTP_BAD_CONTENT_ENCODING; +                return 1; +            } +        } +        if (Header->connection_closed == -1) +            Header->connection_closed = (Header->http_minor == 0 || +                                         AssumeConnectionClosed);          if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) { -            Header->error = HTTP_LENGTH_UNKNOWN;  -            return 1;  -        }  -        if (Header->http_time < 0)  -            Header->http_time = 0;  -        if (Header->mime_type < 0)  -            Header->mime_type = MIME_UNKNOWN;  -        return 0;  -    }  -  -    THttpHeaderParser HeaderParser;  -    THttpChunkParser ChunkParser;  -    States State;  -    long ChunkSize;  +            Header->error = HTTP_LENGTH_UNKNOWN; +            return 1; +        } +        if (Header->http_time < 0) +            Header->http_time = 0; +        if (Header->mime_type < 0) +            Header->mime_type = MIME_UNKNOWN; +        return 0; +    } + +    THttpHeaderParser HeaderParser; +    THttpChunkParser ChunkParser; +    States State; +    long ChunkSize;      THttpHeader* Header; -    int AssumeConnectionClosed;  -    bool HeadRequest;  -};  -  +    int AssumeConnectionClosed; +    bool HeadRequest; +}; +  template <int isReader, typename TCheck = TFakeCheck<>> -class THttpParserGeneric: public THttpParserBase, public TCheck {  -protected:  +class THttpParserGeneric: public THttpParserBase, public TCheck { +protected:      long ParseGeneric(void*& buf, long& size) { -        if (!size) {  -            switch (State) {  -                case hp_error:  -                case hp_eof:  -                    break;  -                case hp_read_closed:  -                    State = hp_eof;  -                    break;  -                case hp_in_header:  -                    Header->error = HTTP_HEADER_EOF;  -                    State = hp_error;  -                    break;  -                case hp_read_alive:  -                case hp_read_chunk:  -                    if (HeadRequest)  -                        State = hp_eof;  -                    else {  -                        Header->error = HTTP_MESSAGE_EOF;  -                        State = hp_error;  -                    }  -                    break;  -                case hp_begin_chunk_header:  -                case hp_chunk_header:  -                    if (HeadRequest)  -                        State = hp_eof;  -                    else {  -                        Header->error = HTTP_CHUNK_EOF;  -                        State = hp_error;  -                    }  -                    break;  -            }  -            return 0;  -        }  -        while (size) {  -            int ret;  -  -            switch (State) {  -                case hp_error:  -                    return 0;  -  -                case hp_eof:  -                    return 0;  -  -                case hp_in_header:  -                    if ((ret = HeaderParser.Execute(buf, size)) < 0) {  -                        Header->error = HTTP_BAD_HEADER_STRING;  -                        State = hp_error;  -                        return 0;  -                    } else if (ret == 2) {  -                        Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1);  -                        size -= long(HeaderParser.lastchar - (char*)buf + 1);  -                        buf = HeaderParser.lastchar + 1;  -                        State = CheckHeaders() ? hp_error  +        if (!size) { +            switch (State) { +                case hp_error: +                case hp_eof: +                    break; +                case hp_read_closed: +                    State = hp_eof; +                    break; +                case hp_in_header: +                    Header->error = HTTP_HEADER_EOF; +                    State = hp_error; +                    break; +                case hp_read_alive: +                case hp_read_chunk: +                    if (HeadRequest) +                        State = hp_eof; +                    else { +                        Header->error = HTTP_MESSAGE_EOF; +                        State = hp_error; +                    } +                    break; +                case hp_begin_chunk_header: +                case hp_chunk_header: +                    if (HeadRequest) +                        State = hp_eof; +                    else { +                        Header->error = HTTP_CHUNK_EOF; +                        State = hp_error; +                    } +                    break; +            } +            return 0; +        } +        while (size) { +            int ret; + +            switch (State) { +                case hp_error: +                    return 0; + +                case hp_eof: +                    return 0; + +                case hp_in_header: +                    if ((ret = HeaderParser.Execute(buf, size)) < 0) { +                        Header->error = HTTP_BAD_HEADER_STRING; +                        State = hp_error; +                        return 0; +                    } else if (ret == 2) { +                        Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1); +                        size -= long(HeaderParser.lastchar - (char*)buf + 1); +                        buf = HeaderParser.lastchar + 1; +                        State = CheckHeaders() ? hp_error                                                 : Header->transfer_chunked ? hp_begin_chunk_header                                                                            : Header->content_length == 0 ? hp_eof                                                                                                          : Header->content_length > 0 ? hp_read_alive                                                                                                                                       : hp_read_closed; -                        if (State == hp_begin_chunk_header) {  -                            // unget \n for chunk reader  +                        if (State == hp_begin_chunk_header) { +                            // unget \n for chunk reader                              buf = (char*)buf - 1; -                            size++;  -                        }  -                        if (isReader)  -                            return size;  -                    } else {  -                        Header->header_size += size;  -                        size = 0;  -                    }  -                    break;  -  -                case hp_read_alive:  -                    Header->entity_size += size;  +                            size++; +                        } +                        if (isReader) +                            return size; +                    } else { +                        Header->header_size += size; +                        size = 0; +                    } +                    break; + +                case hp_read_alive: +                    Header->entity_size += size;                      if (Header->entity_size >= Header->content_length) { -                        State = hp_eof;  -                    }  +                        State = hp_eof; +                    } -                    TCheck::CheckDocPart(buf, size, Header);  -                    if (isReader)  -                        return size;  -                    size = 0;  -                    break;  -  -                case hp_read_closed:  -                    Header->entity_size += size;  -                    TCheck::CheckDocPart(buf, size, Header);  -                    if (isReader)  -                        return size;  -                    size = 0;  -                    break;  -  -                case hp_begin_chunk_header:  -                    ChunkParser.Init();  -                    State = hp_chunk_header;  +                    TCheck::CheckDocPart(buf, size, Header); +                    if (isReader) +                        return size; +                    size = 0; +                    break; + +                case hp_read_closed: +                    Header->entity_size += size; +                    TCheck::CheckDocPart(buf, size, Header); +                    if (isReader) +                        return size; +                    size = 0; +                    break; + +                case hp_begin_chunk_header: +                    ChunkParser.Init(); +                    State = hp_chunk_header;                      [[fallthrough]]; -  -                case hp_chunk_header:  -                    if ((ret = ChunkParser.Execute(buf, size)) < 0) {  -                        Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK);  -                        State = hp_error;  -                        return 0;  -                    } else if (ret == 2) {  -                        Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1);  -                        size -= long(ChunkParser.lastchar - (char*)buf + 1);  -                        buf = ChunkParser.lastchar + 1;  -                        ChunkSize = ChunkParser.chunk_length;  + +                case hp_chunk_header: +                    if ((ret = ChunkParser.Execute(buf, size)) < 0) { +                        Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK); +                        State = hp_error; +                        return 0; +                    } else if (ret == 2) { +                        Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1); +                        size -= long(ChunkParser.lastchar - (char*)buf + 1); +                        buf = ChunkParser.lastchar + 1; +                        ChunkSize = ChunkParser.chunk_length;                          Y_ASSERT(ChunkSize >= 0); -                        State = ChunkSize ? hp_read_chunk : hp_eof;  -                    } else {  -                        Header->entity_size += size;  -                        size = 0;  -                    }  -                    break;  -  -                case hp_read_chunk:  -                    if (size >= ChunkSize) {  -                        Header->entity_size += ChunkSize;  -                        State = hp_begin_chunk_header;  -                        TCheck::CheckDocPart(buf, ChunkSize, Header);  -                        if (isReader)  -                            return ChunkSize;  -                        size -= ChunkSize;  +                        State = ChunkSize ? hp_read_chunk : hp_eof; +                    } else { +                        Header->entity_size += size; +                        size = 0; +                    } +                    break; + +                case hp_read_chunk: +                    if (size >= ChunkSize) { +                        Header->entity_size += ChunkSize; +                        State = hp_begin_chunk_header; +                        TCheck::CheckDocPart(buf, ChunkSize, Header); +                        if (isReader) +                            return ChunkSize; +                        size -= ChunkSize;                          buf = (char*)buf + ChunkSize; -                    } else {  -                        Header->entity_size += size;  -                        ChunkSize -= size;  -                        TCheck::CheckDocPart(buf, size, Header);  -                        if (isReader)  -                            return size;  -                        size = 0;  -                    }  +                    } else { +                        Header->entity_size += size; +                        ChunkSize -= size; +                        TCheck::CheckDocPart(buf, size, Header); +                        if (isReader) +                            return size; +                        size = 0; +                    }                      break; -            }  -        }  -        return size;  -    }  -};  -  +            } +        } +        return size; +    } +}; +  template <class TCheck = TFakeCheck<>> -class THttpParser: public THttpParserGeneric<0, TCheck> {  -    typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG!  -public:  +class THttpParser: public THttpParserGeneric<0, TCheck> { +    typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG! +public:      void Init(THttpHeader* H, bool head_request = false) { -        TBaseT::Header = H;  -        TBaseT::HeaderParser.Init(TBaseT::Header);  -        TBaseT::State = TBaseT::hp_in_header;  -        TBaseT::AssumeConnectionClosed = 0;  -        TBaseT::HeadRequest = head_request;  -    }  -  +        TBaseT::Header = H; +        TBaseT::HeaderParser.Init(TBaseT::Header); +        TBaseT::State = TBaseT::hp_in_header; +        TBaseT::AssumeConnectionClosed = 0; +        TBaseT::HeadRequest = head_request; +    } +      void Parse(void* buf, long size) {          TBaseT::ParseGeneric(buf, size); -    }  -};  -  -class TMemoReader {  -public:  +    } +}; + +class TMemoReader { +public:      int Init(void* buf, long bufsize) { -        Buf = buf;  -        Bufsize = bufsize;  -        return 0;  -    }  +        Buf = buf; +        Bufsize = bufsize; +        return 0; +    }      long Read(void*& buf) {          Y_ASSERT(Bufsize >= 0); -        if (!Bufsize) {  -            Bufsize = -1;  -            return 0;  -        }  -        buf = Buf;  -        long ret = Bufsize;  -        Bufsize = 0;  -        return ret;  -    }  -  -protected:  -    long Bufsize;  +        if (!Bufsize) { +            Bufsize = -1; +            return 0; +        } +        buf = Buf; +        long ret = Bufsize; +        Bufsize = 0; +        return ret; +    } + +protected: +    long Bufsize;      void* Buf; -};  -  -template <class Reader>  -class THttpReader: public THttpParserGeneric<1>, public Reader {  -    typedef THttpParserGeneric<1> TBaseT;  +}; + +template <class Reader> +class THttpReader: public THttpParserGeneric<1>, public Reader { +    typedef THttpParserGeneric<1> TBaseT; -public:  +public:      using TBaseT::AssumeConnectionClosed; -    using TBaseT::Header;  -    using TBaseT::ParseGeneric;  -    using TBaseT::State;  -  +    using TBaseT::Header; +    using TBaseT::ParseGeneric; +    using TBaseT::State; +      int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) { -        Header = H;  -        Eoferr = 1;  -        Size = 0;  +        Header = H; +        Eoferr = 1; +        Size = 0;          AssumeConnectionClosed = assumeConnectionClosed;          HeadRequest = headRequest; -        return parsHeader ? ParseHeader() : SkipHeader();  -    }  -  +        return parsHeader ? ParseHeader() : SkipHeader(); +    } +      long Read(void*& buf) { -        long Chunk;  -        do {  -            if (!Size) {  -                if (Eoferr != 1)  -                    return Eoferr;  -                else if ((Size = (long)Reader::Read(Ptr)) < 0) {  -                    Header->error = HTTP_CONNECTION_LOST;  -                    return Eoferr = -1;  -                }  -            }  +        long Chunk; +        do { +            if (!Size) { +                if (Eoferr != 1) +                    return Eoferr; +                else if ((Size = (long)Reader::Read(Ptr)) < 0) { +                    Header->error = HTTP_CONNECTION_LOST; +                    return Eoferr = -1; +                } +            }              Chunk = ParseGeneric(Ptr, Size); -            buf = Ptr;  -            Ptr = (char*)Ptr + Chunk;  -            Size -= Chunk;  +            buf = Ptr; +            Ptr = (char*)Ptr + Chunk; +            Size -= Chunk;              if (State == hp_eof) {                  Size = 0; -                Eoferr = 0;  +                Eoferr = 0;              } else if (State == hp_error) -                return Eoferr = -1;  -        } while (!Chunk);  -        return Chunk;  -    }  -  -protected:  -    int ParseHeader() {  -        HeaderParser.Init(Header);  -        State = hp_in_header;  -        while (State == hp_in_header) {  -            if ((Size = (long)Reader::Read(Ptr)) < 0)  -                return Eoferr = -1;  +                return Eoferr = -1; +        } while (!Chunk); +        return Chunk; +    } + +protected: +    int ParseHeader() { +        HeaderParser.Init(Header); +        State = hp_in_header; +        while (State == hp_in_header) { +            if ((Size = (long)Reader::Read(Ptr)) < 0) +                return Eoferr = -1;              ParseGeneric(Ptr, Size); -        }  -        if (State == hp_error)  -            return Eoferr = -1;  -        if (State == hp_eof)  -            Eoferr = 0;  -        return 0;  -    }  -  -    int SkipHeader() {  -        long hdrsize = Header->header_size;  -        while (hdrsize) {  -            if ((Size = (long)Reader::Read(Ptr)) <= 0)  -                return Eoferr = -1;  -            if (Size >= hdrsize) {  -                Size -= hdrsize;  -                Ptr = (char*)Ptr + hdrsize;  -                break;  -            }  -            hdrsize -= Size;  -        }  -        State = Header->transfer_chunked ? hp_begin_chunk_header  +        } +        if (State == hp_error) +            return Eoferr = -1; +        if (State == hp_eof) +            Eoferr = 0; +        return 0; +    } + +    int SkipHeader() { +        long hdrsize = Header->header_size; +        while (hdrsize) { +            if ((Size = (long)Reader::Read(Ptr)) <= 0) +                return Eoferr = -1; +            if (Size >= hdrsize) { +                Size -= hdrsize; +                Ptr = (char*)Ptr + hdrsize; +                break; +            } +            hdrsize -= Size; +        } +        State = Header->transfer_chunked ? hp_begin_chunk_header                                           : Header->content_length == 0 ? hp_eof                                                                         : Header->content_length > 0 ? hp_read_alive                                                                                                      : hp_read_closed; -        Header->entity_size = 0;  -        if (State == hp_eof)  -            Eoferr = 0;  -        else if (State == hp_begin_chunk_header) {  -            // unget \n for chunk reader  -            Ptr = (char*)Ptr - 1;  -            ++Size;  -        }  -        return 0;  -    }  -  +        Header->entity_size = 0; +        if (State == hp_eof) +            Eoferr = 0; +        else if (State == hp_begin_chunk_header) { +            // unget \n for chunk reader +            Ptr = (char*)Ptr - 1; +            ++Size; +        } +        return 0; +    } +      void* Ptr; -    long Size;  +    long Size;      int Eoferr; -};  +}; diff --git a/library/cpp/http/fetch/httpzreader.h b/library/cpp/http/fetch/httpzreader.h index d951d21e9a9..68eb00853d6 100644 --- a/library/cpp/http/fetch/httpzreader.h +++ b/library/cpp/http/fetch/httpzreader.h @@ -1,55 +1,55 @@  #pragma once -  +  #include "httpheader.h"  #include "httpparser.h"  #include "exthttpcodes.h" -  +  #include <util/system/defaults.h>  #include <util/generic/yexception.h> -#include <contrib/libs/zlib/zlib.h>  -  +#include <contrib/libs/zlib/zlib.h> +  #include <errno.h> -#ifndef ENOTSUP  +#ifndef ENOTSUP  #define ENOTSUP 45 -#endif  -  -template <class Reader>  -class TCompressedHttpReader: public THttpReader<Reader> {  -    typedef THttpReader<Reader> TBase;  +#endif + +template <class Reader> +class TCompressedHttpReader: public THttpReader<Reader> { +    typedef THttpReader<Reader> TBase; -public:  +public:      using TBase::AssumeConnectionClosed; -    using TBase::Header;  -    using TBase::ParseGeneric;  -    using TBase::State;  -  +    using TBase::Header; +    using TBase::ParseGeneric; +    using TBase::State; +      static constexpr size_t DefaultBufSize = 64 << 10;      static constexpr unsigned int DefaultWinSize = 15; -    TCompressedHttpReader()  -        : CompressedInput(false)  -        , BufSize(0)  -        , CurContSize(0)  -        , MaxContSize(0)  +    TCompressedHttpReader() +        : CompressedInput(false) +        , BufSize(0) +        , CurContSize(0) +        , MaxContSize(0)          , Buf(nullptr) -        , ZErr(0)  +        , ZErr(0)          , ConnectionClosed(0)          , IgnoreTrailingGarbage(true) -    {  -        memset(&Stream, 0, sizeof(Stream));  -    }  -  -    ~TCompressedHttpReader() {  -        ClearStream();  -  -        if (Buf) {  -            free(Buf);  +    { +        memset(&Stream, 0, sizeof(Stream)); +    } + +    ~TCompressedHttpReader() { +        ClearStream(); + +        if (Buf) { +            free(Buf);              Buf = nullptr; -        }  -    }  -  +        } +    } +      void SetConnectionClosed(int cc) {          ConnectionClosed = cc;      } @@ -66,196 +66,196 @@ public:          const unsigned int winSize = DefaultWinSize,          bool headRequest = false)      { -        ZErr = 0;  -        CurContSize = 0;  -        MaxContSize = maxContSize;  -  +        ZErr = 0; +        CurContSize = 0; +        MaxContSize = maxContSize; +          int ret = TBase::Init(H, parsHeader, ConnectionClosed, headRequest); -        if (ret)  -            return ret;  -  -        ret = SetCompression(H->compression_method, bufSize, winSize);  -        return ret;  -    }  -  +        if (ret) +            return ret; + +        ret = SetCompression(H->compression_method, bufSize, winSize); +        return ret; +    } +      long Read(void*& buf) { -        if (!CompressedInput) {  -            long res = TBase::Read(buf);  -            if (res > 0) {  -                CurContSize += (size_t)res;  -                if (CurContSize > MaxContSize) {  -                    ZErr = E2BIG;  -                    return -1;  -                }  -            }  -            return res;  -        }  -  -        while (true) {  -            if (Stream.avail_in == 0) {  +        if (!CompressedInput) { +            long res = TBase::Read(buf); +            if (res > 0) { +                CurContSize += (size_t)res; +                if (CurContSize > MaxContSize) { +                    ZErr = E2BIG; +                    return -1; +                } +            } +            return res; +        } + +        while (true) { +            if (Stream.avail_in == 0) {                  void* tmpin = Stream.next_in;                  long res = TBase::Read(tmpin);                  Stream.next_in = (Bytef*)tmpin;                  if (res <= 0)                      return res;                  Stream.avail_in = (uInt)res; -            }  -  -            Stream.next_out = Buf;  -            Stream.avail_out = (uInt)BufSize;  -            buf = Buf;  -  -            int err = inflate(&Stream, Z_SYNC_FLUSH);  -  +            } + +            Stream.next_out = Buf; +            Stream.avail_out = (uInt)BufSize; +            buf = Buf; + +            int err = inflate(&Stream, Z_SYNC_FLUSH); +              //Y_ASSERT(Stream.avail_in == 0); -  -            switch (err) {  -                case Z_OK:  -                    // there is no data in next_out yet  -                    if (BufSize == Stream.avail_out)  -                        continue;  + +            switch (err) { +                case Z_OK: +                    // there is no data in next_out yet +                    if (BufSize == Stream.avail_out) +                        continue;                      [[fallthrough]]; // don't break or return; continue with Z_STREAM_END case -  -                case Z_STREAM_END:  -                    if (Stream.total_out > MaxContSize) {  -                        ZErr = E2BIG;  -                        return -1;  -                    }  + +                case Z_STREAM_END: +                    if (Stream.total_out > MaxContSize) { +                        ZErr = E2BIG; +                        return -1; +                    }                      if (!IgnoreTrailingGarbage && BufSize == Stream.avail_out && Stream.avail_in > 0) {                          Header->error = EXT_HTTP_GZIPERROR;                          ZErr = EFAULT;                          Stream.msg = (char*)"trailing garbage";                          return -1;                      } -                    return long(BufSize - Stream.avail_out);  -  -                case Z_NEED_DICT:  -                case Z_DATA_ERROR:  -                    Header->error = EXT_HTTP_GZIPERROR;  -                    ZErr = EFAULT;  -                    return -1;  -  -                case Z_MEM_ERROR:  -                    ZErr = ENOMEM;  -                    return -1;  -  -                default:  -                    ZErr = EINVAL;  -                    return -1;  -            }  -        }  -  -        return -1;  -    }  -  +                    return long(BufSize - Stream.avail_out); + +                case Z_NEED_DICT: +                case Z_DATA_ERROR: +                    Header->error = EXT_HTTP_GZIPERROR; +                    ZErr = EFAULT; +                    return -1; + +                case Z_MEM_ERROR: +                    ZErr = ENOMEM; +                    return -1; + +                default: +                    ZErr = EINVAL; +                    return -1; +            } +        } + +        return -1; +    } +      const char* ZMsg() const { -        return Stream.msg;  -    }  -  -    int ZError() const {  -        return ZErr;  -    }  -  -    size_t GetCurContSize() const {  -        return CompressedInput ? Stream.total_out : CurContSize;  -    }  -  -protected:  -    int SetCompression(const int compression, const size_t bufSize,  -                       const unsigned int winSize) {  -        ClearStream();  -  -        int winsize = winSize;  -        switch ((enum HTTP_COMPRESSION)compression) {  -            case HTTP_COMPRESSION_UNSET:  -            case HTTP_COMPRESSION_IDENTITY:  -                CompressedInput = false;  -                return 0;  -            case HTTP_COMPRESSION_GZIP:  -                CompressedInput = true;  +        return Stream.msg; +    } + +    int ZError() const { +        return ZErr; +    } + +    size_t GetCurContSize() const { +        return CompressedInput ? Stream.total_out : CurContSize; +    } + +protected: +    int SetCompression(const int compression, const size_t bufSize, +                       const unsigned int winSize) { +        ClearStream(); + +        int winsize = winSize; +        switch ((enum HTTP_COMPRESSION)compression) { +            case HTTP_COMPRESSION_UNSET: +            case HTTP_COMPRESSION_IDENTITY: +                CompressedInput = false; +                return 0; +            case HTTP_COMPRESSION_GZIP: +                CompressedInput = true;                  winsize += 16; // 16 indicates gzip, see zlib.h -                break;  -            case HTTP_COMPRESSION_DEFLATE:  -                CompressedInput = true;  -                winsize = -winsize; // negative indicates raw deflate stream, see zlib.h  -                break;  -            case HTTP_COMPRESSION_COMPRESS:  -            case HTTP_COMPRESSION_ERROR:  -            default:  -                CompressedInput = false;  -                ZErr = ENOTSUP;  -                return -1;  -        }  -  -        if (bufSize != BufSize) {  -            if (Buf)  -                free(Buf);  -            Buf = (ui8*)malloc(bufSize);  -            if (!Buf) {  -                ZErr = ENOMEM;  -                return -1;  -            }  -            BufSize = bufSize;  -        }  -  -        int err = inflateInit2(&Stream, winsize);  -        switch (err) {  -            case Z_OK:  -                Stream.total_in = 0;  -                Stream.total_out = 0;  -                Stream.avail_in = 0;  -                return 0;  -  -            case Z_DATA_ERROR: // never happens, see zlib.h  -                CompressedInput = false;  -                ZErr = EFAULT;  -                return -1;  -  -            case Z_MEM_ERROR:  -                CompressedInput = false;  -                ZErr = ENOMEM;  -                return -1;  -  -            default:  -                CompressedInput = false;  -                ZErr = EINVAL;  -                return -1;  -        }  -    }  -  -    void ClearStream() {  -        if (CompressedInput) {  -            inflateEnd(&Stream);  -            CompressedInput = false;  -        }  -    }  -  -    z_stream Stream;  -    bool CompressedInput;  -    size_t BufSize;  -    size_t CurContSize, MaxContSize;  -    ui8* Buf;  -    int ZErr;  +                break; +            case HTTP_COMPRESSION_DEFLATE: +                CompressedInput = true; +                winsize = -winsize; // negative indicates raw deflate stream, see zlib.h +                break; +            case HTTP_COMPRESSION_COMPRESS: +            case HTTP_COMPRESSION_ERROR: +            default: +                CompressedInput = false; +                ZErr = ENOTSUP; +                return -1; +        } + +        if (bufSize != BufSize) { +            if (Buf) +                free(Buf); +            Buf = (ui8*)malloc(bufSize); +            if (!Buf) { +                ZErr = ENOMEM; +                return -1; +            } +            BufSize = bufSize; +        } + +        int err = inflateInit2(&Stream, winsize); +        switch (err) { +            case Z_OK: +                Stream.total_in = 0; +                Stream.total_out = 0; +                Stream.avail_in = 0; +                return 0; + +            case Z_DATA_ERROR: // never happens, see zlib.h +                CompressedInput = false; +                ZErr = EFAULT; +                return -1; + +            case Z_MEM_ERROR: +                CompressedInput = false; +                ZErr = ENOMEM; +                return -1; + +            default: +                CompressedInput = false; +                ZErr = EINVAL; +                return -1; +        } +    } + +    void ClearStream() { +        if (CompressedInput) { +            inflateEnd(&Stream); +            CompressedInput = false; +        } +    } + +    z_stream Stream; +    bool CompressedInput; +    size_t BufSize; +    size_t CurContSize, MaxContSize; +    ui8* Buf; +    int ZErr;      int ConnectionClosed;      bool IgnoreTrailingGarbage; -};  -  +}; +  class zlib_exception: public yexception { -};  -  -template <class Reader>  -class SCompressedHttpReader: public TCompressedHttpReader<Reader> {  -    typedef TCompressedHttpReader<Reader> TBase;  - -public:  -    using TBase::ZError;  -    using TBase::ZMsg;  -  -    SCompressedHttpReader()  +}; + +template <class Reader> +class SCompressedHttpReader: public TCompressedHttpReader<Reader> { +    typedef TCompressedHttpReader<Reader> TBase; + +public: +    using TBase::ZError; +    using TBase::ZMsg; + +    SCompressedHttpReader()          : TBase()      {      } -  +      int Init(          THttpHeader* H,          int parsHeader, @@ -265,31 +265,31 @@ public:          bool headRequest = false)      {          int ret = TBase::Init(H, parsHeader, maxContSize, bufSize, winSize, headRequest); -        return (int)HandleRetValue((long)ret);  -    }  -  +        return (int)HandleRetValue((long)ret); +    } +      long Read(void*& buf) { -        long ret = TBase::Read(buf);  -        return HandleRetValue(ret);  -    }  -  -protected:  -    long HandleRetValue(long ret) {  -        switch (ZError()) {  -            case 0:  -                return ret;  -            case ENOMEM:  +        long ret = TBase::Read(buf); +        return HandleRetValue(ret); +    } + +protected: +    long HandleRetValue(long ret) { +        switch (ZError()) { +            case 0: +                return ret; +            case ENOMEM:                  ythrow yexception() << "SCompressedHttpReader: not enough memory"; -            case EINVAL:  +            case EINVAL:                  ythrow yexception() << "SCompressedHttpReader: zlib error: " << ZMsg(); -            case ENOTSUP:  +            case ENOTSUP:                  ythrow yexception() << "SCompressedHttpReader: unsupported compression method"; -            case EFAULT:  +            case EFAULT:                  ythrow zlib_exception() << "SCompressedHttpReader: " << ZMsg(); -            case E2BIG:  +            case E2BIG:                  ythrow zlib_exception() << "SCompressedHttpReader: Content exceeds maximum length"; -            default:  +            default:                  ythrow yexception() << "SCompressedHttpReader: unknown error"; -        }  -    }  -};  +        } +    } +}; diff --git a/library/cpp/http/fetch/ya.make b/library/cpp/http/fetch/ya.make index aa8b073a8ca..77371274639 100644 --- a/library/cpp/http/fetch/ya.make +++ b/library/cpp/http/fetch/ya.make @@ -1,5 +1,5 @@  LIBRARY() -  +  OWNER(      g:zora  ) @@ -14,25 +14,25 @@ PEERDIR(      library/cpp/uri  ) -SRCS(  -    http_digest.cpp  -    http_socket.cpp  +SRCS( +    http_digest.cpp +    http_socket.cpp      httpheader.cpp -    httpload.cpp  -    exthttpcodes.cpp  +    httpload.cpp +    exthttpcodes.cpp      httpfsm.rl6 -    httpagent.h  -    httpfetcher.h  -    httpheader.h  -    httpparser.h  -    httpzreader.h  +    httpagent.h +    httpfetcher.h +    httpheader.h +    httpparser.h +    httpzreader.h      sockhandler.h -)  -  +) +  GENERATE_ENUM_SERIALIZATION(httpheader.h)  SET(RAGEL6_FLAGS -CF1) -END()  +END()  RECURSE_FOR_TESTS(ut)  | 
