diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/http | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/http')
71 files changed, 11612 insertions, 0 deletions
diff --git a/library/cpp/http/fetch/exthttpcodes.cpp b/library/cpp/http/fetch/exthttpcodes.cpp new file mode 100644 index 0000000000..acc05650c8 --- /dev/null +++ b/library/cpp/http/fetch/exthttpcodes.cpp @@ -0,0 +1,266 @@ +#include "exthttpcodes.h" + +#include <cstring> + +const ui16 CrazyServer = ShouldDelete | MarkSuspect; + +struct http_flag { + ui16 http; + ui16 flag; +}; +static http_flag HTTP_FLAG[] = { + {HTTP_CONTINUE, MarkSuspect}, // 100 + {HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101 + {HTTP_PROCESSING, CrazyServer}, // 102 + + {HTTP_OK, ShouldReindex}, // 200 + {HTTP_CREATED, CrazyServer}, // 201 + {HTTP_ACCEPTED, ShouldDelete}, // 202 + {HTTP_NON_AUTHORITATIVE_INFORMATION, ShouldReindex}, // 203 + {HTTP_NO_CONTENT, ShouldDelete}, // 204 + {HTTP_RESET_CONTENT, ShouldDelete}, // 205 + {HTTP_PARTIAL_CONTENT, ShouldReindex}, // 206 + {HTTP_MULTI_STATUS, CrazyServer}, // 207 + {HTTP_ALREADY_REPORTED, CrazyServer}, // 208 + {HTTP_IM_USED, CrazyServer}, // 226 + + {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete}, // 300 + {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir}, // 301 + {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir}, // 302 + {HTTP_SEE_OTHER, CheckLocation | ShouldDelete | MoveRedir}, // 303 + {HTTP_NOT_MODIFIED, 0}, // 304 + {HTTP_USE_PROXY, ShouldDelete}, // 305 + {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307 + {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308 + + {HTTP_BAD_REQUEST, CrazyServer}, // 400 + {HTTP_UNAUTHORIZED, ShouldDelete}, // 401 + {HTTP_PAYMENT_REQUIRED, ShouldDelete}, // 402 + {HTTP_FORBIDDEN, ShouldDelete}, // 403 + {HTTP_NOT_FOUND, ShouldDelete}, // 404 + {HTTP_METHOD_NOT_ALLOWED, ShouldDelete}, // 405 + {HTTP_NOT_ACCEPTABLE, ShouldDelete}, // 406 + {HTTP_PROXY_AUTHENTICATION_REQUIRED, CrazyServer}, // 407 + {HTTP_REQUEST_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 408 + {HTTP_CONFLICT, MarkSuspect}, // 409 + {HTTP_GONE, ShouldDelete}, // 410 + {HTTP_LENGTH_REQUIRED, CrazyServer}, // 411 + {HTTP_PRECONDITION_FAILED, CrazyServer}, // 412 + {HTTP_REQUEST_ENTITY_TOO_LARGE, CrazyServer}, // 413 + {HTTP_REQUEST_URI_TOO_LARGE, ShouldDelete}, // 414 + {HTTP_UNSUPPORTED_MEDIA_TYPE, CrazyServer}, // 415 + {HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, CrazyServer}, // 416 + {HTTP_EXPECTATION_FAILED, ShouldDelete}, // 417 + {HTTP_I_AM_A_TEAPOT, CrazyServer}, // 418 + {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete}, // 419 + + {HTTP_MISDIRECTED_REQUEST, CrazyServer}, // 421 + {HTTP_UNPROCESSABLE_ENTITY, CrazyServer}, // 422 + {HTTP_LOCKED, ShouldDelete}, // 423 + {HTTP_FAILED_DEPENDENCY, CrazyServer}, // 424 + {HTTP_UPGRADE_REQUIRED, ShouldDelete}, // 426 + {HTTP_PRECONDITION_REQUIRED, ShouldDelete}, // 428 + {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429 + {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete}, // 451 + + {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect}, // 500 + {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect}, // 501 + {HTTP_BAD_GATEWAY, MarkSuspect}, // 502 + {HTTP_SERVICE_UNAVAILABLE, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 503 + {HTTP_GATEWAY_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 504 + {HTTP_HTTP_VERSION_NOT_SUPPORTED, CrazyServer | ShouldDisconnect}, // 505 + + {HTTP_VARIANT_ALSO_NEGOTIATES, CrazyServer | ShouldDisconnect}, // 506 + {HTTP_INSUFFICIENT_STORAGE, CrazyServer | ShouldDisconnect}, // 507 + {HTTP_LOOP_DETECTED, CrazyServer | ShouldDisconnect}, // 508 + {HTTP_BANDWIDTH_LIMIT_EXCEEDED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 509 + {HTTP_NOT_EXTENDED, ShouldDelete}, // 510 + {HTTP_NETWORK_AUTHENTICATION_REQUIRED, ShouldDelete}, // 511 + + // custom + {HTTP_BAD_RESPONSE_HEADER, CrazyServer}, // 1000 + {HTTP_CONNECTION_LOST, ShouldRetry}, // 1001 + {HTTP_BODY_TOO_LARGE, ShouldDelete | CanBeFake}, // 1002 + {HTTP_ROBOTS_TXT_DISALLOW, ShouldDelete}, // 1003 + {HTTP_BAD_URL, ShouldDelete}, // 1004 + {HTTP_BAD_MIME, ShouldDelete}, // 1005 + {HTTP_DNS_FAILURE, ShouldDisconnect | MarkSuspect}, // 1006 + {HTTP_BAD_STATUS_CODE, CrazyServer}, // 1007 + {HTTP_BAD_HEADER_STRING, CrazyServer}, // 1008 + {HTTP_BAD_CHUNK, CrazyServer}, // 1009 + {HTTP_CONNECT_FAILED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 1010 + {HTTP_FILTER_DISALLOW, ShouldDelete}, // 1011 + {HTTP_LOCAL_EIO, ShouldRetry}, // 1012 + {HTTP_BAD_CONTENT_LENGTH, ShouldDelete}, // 1013 + {HTTP_BAD_ENCODING, ShouldDelete}, // 1014 + {HTTP_LENGTH_UNKNOWN, ShouldDelete}, // 1015 + {HTTP_HEADER_EOF, ShouldRetry | CanBeFake}, // 1016 + {HTTP_MESSAGE_EOF, ShouldRetry | CanBeFake}, // 1017 + {HTTP_CHUNK_EOF, ShouldRetry | CanBeFake}, // 1018 + {HTTP_PAST_EOF, ShouldRetry | ShouldDelete | CanBeFake}, // 1019 + {HTTP_HEADER_TOO_LARGE, ShouldDelete}, // 1020 + {HTTP_URL_TOO_LARGE, ShouldDelete}, // 1021 + {HTTP_INTERRUPTED, 0}, // 1022 + {HTTP_CUSTOM_NOT_MODIFIED, 0}, // 1023 + {HTTP_BAD_CONTENT_ENCODING, ShouldDelete}, // 1024 + {HTTP_PROXY_UNKNOWN, 0}, // 1030 + {HTTP_PROXY_REQUEST_TIME_OUT, 0}, // 1031 + {HTTP_PROXY_INTERNAL_ERROR, 0}, // 1032 + {HTTP_PROXY_CONNECT_FAILED, 0}, // 1033 + {HTTP_PROXY_CONNECTION_LOST, 0}, // 1034 + {HTTP_PROXY_NO_PROXY, 0}, // 1035 + {HTTP_PROXY_ERROR, 0}, // 1036 + {HTTP_SSL_ERROR, 0}, // 1037 + {HTTP_CACHED_COPY_NOT_FOUND, 0}, // 1038 + {HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING, ShouldRetry}, // 1039 + {HTTP_FETCHER_BAD_RESPONSE, 0}, // 1040 + {HTTP_FETCHER_MB_ERROR, 0}, // 1041 + {HTTP_SSL_CERT_ERROR, 0}, // 1042 + + // Custom (replace HTTP 200/304) + {EXT_HTTP_MIRRMOVE, 0}, // 2000 + {EXT_HTTP_MANUAL_DELETE, ShouldDelete}, // 2001 + {EXT_HTTP_NOTUSED2, ShouldDelete}, // 2002 + {EXT_HTTP_NOTUSED3, ShouldDelete}, // 2003 + {EXT_HTTP_REFRESH, ShouldDelete | CheckLinks | MoveRedir}, // 2004 + {EXT_HTTP_NOINDEX, ShouldDelete | CheckLinks}, // 2005 + {EXT_HTTP_BADCODES, ShouldDelete}, // 2006 + {EXT_HTTP_SITESTAT, ShouldDelete}, // 2007 + {EXT_HTTP_IOERROR, ShouldDelete}, // 2008 + {EXT_HTTP_BASEERROR, ShouldDelete}, // 2009 + {EXT_HTTP_PARSERROR, ShouldDelete | CanBeFake}, // 2010 + {EXT_HTTP_BAD_CHARSET, ShouldDelete | CheckLinks}, // 2011 + {EXT_HTTP_BAD_LANGUAGE, ShouldDelete | CheckLinks}, // 2012 + {EXT_HTTP_NUMERERROR, ShouldDelete}, // 2013 + {EXT_HTTP_EMPTYDOC, ShouldDelete | CheckLinks}, // 2014 + {EXT_HTTP_HUGEDOC, ShouldDelete}, // 2015 + {EXT_HTTP_LINKGARBAGE, ShouldDelete}, // 2016 + {EXT_HTTP_PARSERFAIL, ShouldDelete}, // 2019 + {EXT_HTTP_GZIPERROR, ShouldDelete}, // 2020 + {EXT_HTTP_MANUAL_DELETE_URL, ShouldDelete}, // 2022 + {EXT_HTTP_CUSTOM_PARTIAL_CONTENT, ShouldReindex}, // 2023 + {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete}, // 2024 + {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025 + {0, 0}}; + +static ui16* prepare_flags(http_flag* arg) { + static ui16 flags[EXT_HTTP_CODE_MAX]; + http_flag* ptr; + size_t i; + + // устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше + for (i = 0; i < EXT_HTTP_CODE_MAX; ++i) + flags[i] = CrazyServer; + + // устанавливаем флаги для перечисленных кодов + for (ptr = arg; ptr->http; ++ptr) + flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; + + // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их + // всем кодам не перечисленным в таблице выше + for (size_t group = 0; group < 1000; group += 100) + for (size_t j = group + 1; j < group + 100; ++j) + flags[j] = flags[group]; + + // предыдущий цикл затер некоторые флаги перечисленные в таблице выше + // восстанавливаем их + for (ptr = arg; ptr->http; ++ptr) + flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag; + + return flags; +} + +ui16* http2status = prepare_flags(HTTP_FLAG); + +TStringBuf ExtHttpCodeStr(int code) noexcept { + if (code < HTTP_CODE_MAX) { + return HttpCodeStr(code); + } + switch (code) { + case HTTP_BAD_RESPONSE_HEADER: + return TStringBuf("Bad response header"); + case HTTP_CONNECTION_LOST: + return TStringBuf("Connection lost"); + case HTTP_BODY_TOO_LARGE: + return TStringBuf("Body too large"); + case HTTP_ROBOTS_TXT_DISALLOW: + return TStringBuf("robots.txt disallow"); + case HTTP_BAD_URL: + return TStringBuf("Bad url"); + case HTTP_BAD_MIME: + return TStringBuf("Bad mime type"); + case HTTP_DNS_FAILURE: + return TStringBuf("Dns failure"); + case HTTP_BAD_STATUS_CODE: + return TStringBuf("Bad status code"); + case HTTP_BAD_HEADER_STRING: + return TStringBuf("Bad header string"); + case HTTP_BAD_CHUNK: + return TStringBuf("Bad chunk"); + case HTTP_CONNECT_FAILED: + return TStringBuf("Connect failed"); + case HTTP_FILTER_DISALLOW: + return TStringBuf("Filter disallow"); + case HTTP_LOCAL_EIO: + return TStringBuf("Local eio"); + case HTTP_BAD_CONTENT_LENGTH: + return TStringBuf("Bad content length"); + case HTTP_BAD_ENCODING: + return TStringBuf("Bad encoding"); + case HTTP_LENGTH_UNKNOWN: + return TStringBuf("Length unknown"); + case HTTP_HEADER_EOF: + return TStringBuf("Header EOF"); + case HTTP_MESSAGE_EOF: + return TStringBuf("Message EOF"); + case HTTP_CHUNK_EOF: + return TStringBuf("Chunk EOF"); + case HTTP_PAST_EOF: + return TStringBuf("Past EOF"); + case HTTP_HEADER_TOO_LARGE: + return TStringBuf("Header is too large"); + case HTTP_URL_TOO_LARGE: + return TStringBuf("Url is too large"); + case HTTP_INTERRUPTED: + return TStringBuf("Interrupted"); + case HTTP_CUSTOM_NOT_MODIFIED: + return TStringBuf("Signature detector thinks that doc is not modified"); + case HTTP_BAD_CONTENT_ENCODING: + return TStringBuf("Bad content encoding"); + case HTTP_NO_RESOURCES: + return TStringBuf("No resources"); + case HTTP_FETCHER_SHUTDOWN: + return TStringBuf("Fetcher shutdown"); + case HTTP_CHUNK_TOO_LARGE: + return TStringBuf("Chunk size is too big"); + case HTTP_SERVER_BUSY: + return TStringBuf("Server is busy"); + case HTTP_SERVICE_UNKNOWN: + return TStringBuf("Service is unknown"); + case HTTP_PROXY_UNKNOWN: + return TStringBuf("Zora: unknown error"); + case HTTP_PROXY_REQUEST_TIME_OUT: + return TStringBuf("Zora: request time out"); + case HTTP_PROXY_INTERNAL_ERROR: + return TStringBuf("Zora: internal server error"); + case HTTP_PROXY_CONNECT_FAILED: + return TStringBuf("Spider proxy connect failed"); + case HTTP_PROXY_CONNECTION_LOST: + return TStringBuf("Spider proxy connection lost"); + case HTTP_PROXY_NO_PROXY: + return TStringBuf("Spider proxy no proxy alive in region"); + case HTTP_PROXY_ERROR: + return TStringBuf("Spider proxy returned custom error"); + case HTTP_SSL_ERROR: + return TStringBuf("Ssl library returned error"); + case HTTP_CACHED_COPY_NOT_FOUND: + return TStringBuf("Cached copy for the url is not available"); + case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING: + return TStringBuf("Timed out while bytes receiving"); + + // TODO: messages for >2000 codes + + default: + return TStringBuf("Unknown HTTP code"); + } +} diff --git a/library/cpp/http/fetch/exthttpcodes.h b/library/cpp/http/fetch/exthttpcodes.h new file mode 100644 index 0000000000..6b525052cd --- /dev/null +++ b/library/cpp/http/fetch/exthttpcodes.h @@ -0,0 +1,141 @@ +#pragma once + +#include <util/system/defaults.h> +#include <library/cpp/http/misc/httpcodes.h> + +enum ExtHttpCodes { + // Custom + HTTP_EXTENDED = 1000, + HTTP_BAD_RESPONSE_HEADER = 1000, + HTTP_CONNECTION_LOST = 1001, + HTTP_BODY_TOO_LARGE = 1002, + HTTP_ROBOTS_TXT_DISALLOW = 1003, + HTTP_BAD_URL = 1004, + HTTP_BAD_MIME = 1005, + HTTP_DNS_FAILURE = 1006, + HTTP_BAD_STATUS_CODE = 1007, + HTTP_BAD_HEADER_STRING = 1008, + HTTP_BAD_CHUNK = 1009, + HTTP_CONNECT_FAILED = 1010, + HTTP_FILTER_DISALLOW = 1011, + HTTP_LOCAL_EIO = 1012, + HTTP_BAD_CONTENT_LENGTH = 1013, + HTTP_BAD_ENCODING = 1014, + HTTP_LENGTH_UNKNOWN = 1015, + HTTP_HEADER_EOF = 1016, + HTTP_MESSAGE_EOF = 1017, + HTTP_CHUNK_EOF = 1018, + HTTP_PAST_EOF = 1019, + HTTP_HEADER_TOO_LARGE = 1020, + HTTP_URL_TOO_LARGE = 1021, + HTTP_INTERRUPTED = 1022, + HTTP_CUSTOM_NOT_MODIFIED = 1023, + HTTP_BAD_CONTENT_ENCODING = 1024, + HTTP_NO_RESOURCES = 1025, + HTTP_FETCHER_SHUTDOWN = 1026, + HTTP_CHUNK_TOO_LARGE = 1027, + HTTP_SERVER_BUSY = 1028, + HTTP_SERVICE_UNKNOWN = 1029, + HTTP_PROXY_UNKNOWN = 1030, + HTTP_PROXY_REQUEST_TIME_OUT = 1031, + HTTP_PROXY_INTERNAL_ERROR = 1032, + HTTP_PROXY_CONNECT_FAILED = 1033, + HTTP_PROXY_CONNECTION_LOST = 1034, + HTTP_PROXY_NO_PROXY = 1035, + HTTP_PROXY_ERROR = 1036, + HTTP_SSL_ERROR = 1037, + HTTP_CACHED_COPY_NOT_FOUND = 1038, + HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING = 1039, + HTTP_FETCHER_BAD_RESPONSE = 1040, + HTTP_FETCHER_MB_ERROR = 1041, + HTTP_SSL_CERT_ERROR = 1042, + HTTP_PROXY_REQUEST_CANCELED = 1051, + + // Custom (replace HTTP 200/304) + EXT_HTTP_EXT_SUCCESS_BEGIN = 2000, // to check if code variable is in success interval + EXT_HTTP_MIRRMOVE = 2000, + EXT_HTTP_MANUAL_DELETE = 2001, + EXT_HTTP_NOTUSED2 = 2002, + EXT_HTTP_NOTUSED3 = 2003, + EXT_HTTP_REFRESH = 2004, + EXT_HTTP_NOINDEX = 2005, + EXT_HTTP_BADCODES = 2006, + EXT_HTTP_SITESTAT = 2007, + EXT_HTTP_IOERROR = 2008, + EXT_HTTP_BASEERROR = 2009, + EXT_HTTP_PARSERROR = 2010, + EXT_HTTP_BAD_CHARSET = 2011, + EXT_HTTP_BAD_LANGUAGE = 2012, + EXT_HTTP_NUMERERROR = 2013, + EXT_HTTP_EMPTYDOC = 2014, + EXT_HTTP_HUGEDOC = 2015, + EXT_HTTP_LINKGARBAGE = 2016, + EXT_HTTP_EXDUPLICATE = 2017, + EXT_HTTP_FILTERED = 2018, + EXT_HTTP_PARSERFAIL = 2019, // parser crashed (in this case image spider will redownload such document) + EXT_HTTP_GZIPERROR = 2020, + EXT_HTTP_CLEANPARAM = 2021, + EXT_HTTP_MANUAL_DELETE_URL = 2022, + EXT_HTTP_CUSTOM_PARTIAL_CONTENT = 2023, + EXT_HTTP_EMPTY_RESPONSE = 2024, + EXT_HTTP_REL_CANONICAL = 2025, + + EXT_HTTP_EXT_SUCCESS_END = 3000, // to check if code variable is in success interval + EXT_HTTP_HOSTFILTER = 3001, + EXT_HTTP_URLFILTER = 3002, + EXT_HTTP_SUFFIXFILTER = 3003, + EXT_HTTP_DOMAINFILTER = 3004, + EXT_HTTP_EXTDOMAINFILTER = 3005, + EXT_HTTP_PORTFILTER = 3006, + EXT_HTTP_MIRROR = 3007, + EXT_HTTP_DEEPDIR = 3008, + EXT_HTTP_DUPDIRS = 3009, + EXT_HTTP_REGEXP = 3010, + EXT_HTTP_OLDDELETED = 3012, + EXT_HTTP_PENALTY = 3013, + EXT_HTTP_POLICY = 3015, + EXT_HTTP_TOOOLD = 3016, + EXT_HTTP_GARBAGE = 3017, + EXT_HTTP_FOREIGN = 3018, + EXT_HTTP_EXT_REGEXP = 3019, + EXT_HTTP_HOPS = 3020, + EXT_HTTP_SELRANK = 3021, + EXT_HTTP_NOLINKS = 3022, + EXT_HTTP_WRONGMULTILANG = 3023, + EXT_HTTP_SOFTMIRRORS = 3024, + EXT_HTTP_BIGLEVEL = 3025, + + // fast robot codes + + EXT_HTTP_FASTHOPS = 4000, + EXT_HTTP_NODOC = 4001, + + EXT_HTTP_MAX +}; + +enum HttpFlags { + // connection + ShouldDisconnect = 1, + ShouldRetry = 2, + // UNUSED 4 + + // indexer + ShouldReindex = 8, + ShouldDelete = 16, + CheckLocation = 32, + CheckLinks = 64, + MarkSuspect = 128, + // UNUSED 256 + // UNUSED 512 + MoveRedir = 1024, + CanBeFake = 2048, +}; + +const size_t EXT_HTTP_CODE_MAX = 1 << 12; + +static inline int Http2Status(int code) { + extern ui16* http2status; + return http2status[code & (EXT_HTTP_CODE_MAX - 1)]; +} + +TStringBuf ExtHttpCodeStr(int code) noexcept; diff --git a/library/cpp/http/fetch/http_digest.cpp b/library/cpp/http/fetch/http_digest.cpp new file mode 100644 index 0000000000..1eaa02b7f2 --- /dev/null +++ b/library/cpp/http/fetch/http_digest.cpp @@ -0,0 +1,206 @@ +#include "http_digest.h" + +#include <library/cpp/digest/md5/md5.h> +#include <util/stream/output.h> +#include <util/stream/str.h> + +/************************************************************/ +/************************************************************/ +static const char* WWW_PREFIX = "Authorization: Digest "; + +/************************************************************/ +httpDigestHandler::httpDigestHandler() + : User_(nullptr) + , Password_(nullptr) + , Nonce_(nullptr) + , NonceCount_(0) + , HeaderInstruction_(nullptr) +{ +} + +/************************************************************/ +httpDigestHandler::~httpDigestHandler() { + clear(); +} + +/************************************************************/ +void httpDigestHandler::clear() { + free(Nonce_); + free(HeaderInstruction_); + User_ = Password_ = nullptr; + Nonce_ = HeaderInstruction_ = nullptr; + NonceCount_ = 0; +} + +/************************************************************/ +void httpDigestHandler::setAuthorization(const char* user, const char* password) { + clear(); + if (user && password) { + User_ = user; + Password_ = password; + } +} + +/************************************************************/ +const char* httpDigestHandler::getHeaderInstruction() const { + return HeaderInstruction_; +} + +/************************************************************/ +void httpDigestHandler::generateCNonce(char* outCNonce) { + if (!*outCNonce) + sprintf(outCNonce, "%ld", (long)time(nullptr)); +} + +/************************************************************/ +inline void addMD5(MD5& ctx, const char* value) { + ctx.Update((const unsigned char*)(value), strlen(value)); +} + +inline void addMD5(MD5& ctx, const char* value, int len) { + ctx.Update((const unsigned char*)(value), len); +} + +inline void addMD5Sep(MD5& ctx) { + addMD5(ctx, ":", 1); +} + +/************************************************************/ +/* calculate H(A1) as per spec */ +void httpDigestHandler::digestCalcHA1(const THttpAuthHeader& hd, + char* outSessionKey, + char* outCNonce) { + MD5 ctx; + ctx.Init(); + addMD5(ctx, User_); + addMD5Sep(ctx); + addMD5(ctx, hd.realm); + addMD5Sep(ctx); + addMD5(ctx, Password_); + + if (hd.algorithm == 1) { //MD5-sess + unsigned char digest[16]; + ctx.Final(digest); + + generateCNonce(outCNonce); + + ctx.Init(); + ctx.Update(digest, 16); + addMD5Sep(ctx); + addMD5(ctx, hd.nonce); + addMD5Sep(ctx); + addMD5(ctx, outCNonce); + ctx.End(outSessionKey); + } + + ctx.End(outSessionKey); +}; + +/************************************************************/ +/* calculate request-digest/response-digest as per HTTP Digest spec */ +void httpDigestHandler::digestCalcResponse(const THttpAuthHeader& hd, + const char* path, + const char* method, + const char* nonceCount, + char* outResponse, + char* outCNonce) { + char HA1[33]; + digestCalcHA1(hd, HA1, outCNonce); + + char HA2[33]; + MD5 ctx; + ctx.Init(); + addMD5(ctx, method); + addMD5Sep(ctx); + addMD5(ctx, path); + //ignore auth-int + ctx.End(HA2); + + ctx.Init(); + addMD5(ctx, HA1, 32); + addMD5Sep(ctx); + addMD5(ctx, Nonce_); + addMD5Sep(ctx); + + if (hd.qop_auth) { + if (!*outCNonce) + generateCNonce(outCNonce); + + addMD5(ctx, nonceCount, 8); + addMD5Sep(ctx); + addMD5(ctx, outCNonce); + addMD5Sep(ctx); + addMD5(ctx, "auth", 4); + addMD5Sep(ctx); + } + addMD5(ctx, HA2, 32); + ctx.End(outResponse); +} + +/************************************************************/ +bool httpDigestHandler::processHeader(const THttpAuthHeader* header, + const char* path, + const char* method, + const char* cnonce) { + if (!User_ || !header || !header->use_auth || !header->realm || !header->nonce) + return false; + + if (Nonce_) { + if (strcmp(Nonce_, header->nonce)) { + free(Nonce_); + Nonce_ = nullptr; + NonceCount_ = 0; + } + } + if (!Nonce_) { + Nonce_ = strdup(header->nonce); + NonceCount_ = 0; + } + free(HeaderInstruction_); + HeaderInstruction_ = nullptr; + NonceCount_++; + + char nonceCount[20]; + sprintf(nonceCount, "%08d", NonceCount_); + + char CNonce[50]; + if (cnonce) + strcpy(CNonce, cnonce); + else + CNonce[0] = 0; + + char response[33]; + digestCalcResponse(*header, path, method, nonceCount, response, CNonce); + + //digest-response = 1#( username | realm | nonce | digest-uri + // | response | [ algorithm ] | [cnonce] | + // [opaque] | [message-qop] | + // [nonce-count] | [auth-param] ) + + TStringStream out; + out << WWW_PREFIX << "username=\"" << User_ << "\""; + out << ", realm=\"" << header->realm << "\""; + out << ", nonce=\"" << header->nonce << "\""; + out << ", uri=\"" << path << "\""; + if (header->algorithm == 1) + out << ", algorithm=MD5-sess"; + else + out << ", algorithm=MD5"; + if (header->qop_auth) + out << ", qop=auth"; + out << ", nc=" << nonceCount; + if (CNonce[0]) + out << ", cnonce=\"" << CNonce << "\""; + out << ", response=\"" << response << "\""; + if (header->opaque) + out << ", opaque=\"" << header->opaque << "\""; + out << "\r\n"; + + TString s_out = out.Str(); + HeaderInstruction_ = strdup(s_out.c_str()); + + return true; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/http_digest.h b/library/cpp/http/fetch/http_digest.h new file mode 100644 index 0000000000..3b1872d70b --- /dev/null +++ b/library/cpp/http/fetch/http_digest.h @@ -0,0 +1,47 @@ +#pragma once + +#include "httpheader.h" + +#include <util/system/compat.h> +#include <library/cpp/http/misc/httpcodes.h> + +class httpDigestHandler { +protected: + const char* User_; + const char* Password_; + char* Nonce_; + int NonceCount_; + char* HeaderInstruction_; + + void clear(); + + void generateCNonce(char* outCNonce); + + void digestCalcHA1(const THttpAuthHeader& hd, + char* outSessionKey, + char* outCNonce); + + void digestCalcResponse(const THttpAuthHeader& hd, + const char* method, + const char* path, + const char* nonceCount, + char* outResponse, + char* outCNonce); + +public: + httpDigestHandler(); + ~httpDigestHandler(); + + void setAuthorization(const char* user, + const char* password); + bool processHeader(const THttpAuthHeader* header, + const char* path, + const char* method, + const char* cnonce = nullptr); + + bool empty() const { + return (!User_); + } + + const char* getHeaderInstruction() const; +}; diff --git a/library/cpp/http/fetch/http_socket.cpp b/library/cpp/http/fetch/http_socket.cpp new file mode 100644 index 0000000000..1524ef04a8 --- /dev/null +++ b/library/cpp/http/fetch/http_socket.cpp @@ -0,0 +1,206 @@ +#include "httpload.h" +#include "http_digest.h" + +/************************************************************/ + +#ifdef USE_GNUTLS + +#include <gcrypt.h> +#include <gnutls/gnutls.h> +#include <util/network/init.h> +#include <util/network/socket.h> +#include <util/system/mutex.h> + +/********************************************************/ +// HTTPS handler is used as implementation of +// socketAbstractHandler for work through HTTPS protocol + +class socketSecureHandler: public socketRegularHandler { +protected: + bool IsValid_; + gnutls_session Session_; + gnutls_certificate_credentials Credits_; + +public: + socketSecureHandler(); + virtual ~socketSecureHandler(); + + virtual bool Good(); + virtual int Connect(const TAddrList& addrs, TDuration Timeout); + virtual void Disconnect(); + virtual void shutdown(); + virtual bool send(const char* message, ssize_t messlen); + virtual bool peek(); + virtual ssize_t read(void* buffer, ssize_t buflen); +}; + +/********************************************************/ +/********************************************************/ +static int gcry_pthread_mutex_init(void** priv) { + int err = 0; + + try { + TMutex* lock = new TMutex; + *priv = lock; + } catch (...) { + err = -1; + } + + return err; +} + +static int gcry_pthread_mutex_destroy(void** lock) { + delete static_cast<TMutex*>(*lock); + + return 0; +} + +static int gcry_pthread_mutex_lock(void** lock) { + static_cast<TMutex*>(*lock)->Acquire(); + + return 0; +} + +static int gcry_pthread_mutex_unlock(void** lock) { + static_cast<TMutex*>(*lock)->Release(); + + return 0; +} + +static struct gcry_thread_cbs gcry_threads_pthread = + { + GCRY_THREAD_OPTION_PTHREAD, NULL, + gcry_pthread_mutex_init, gcry_pthread_mutex_destroy, + gcry_pthread_mutex_lock, gcry_pthread_mutex_unlock, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL}; + +/********************************************************/ +struct https_initor { + https_initor() { + gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread); + gnutls_global_init(); + InitNetworkSubSystem(); + } + + ~https_initor() { + gnutls_global_deinit(); + } +}; + +static https_initor _initor; + +/********************************************************/ +socketSecureHandler::socketSecureHandler() + : socketRegularHandler() + , IsValid_(false) + , Session_() + , Credits_() +{ +} + +/********************************************************/ +socketSecureHandler::~socketSecureHandler() { + if (IsValid_) + Disconnect(); +} + +/********************************************************/ +bool socketSecureHandler::Good() { + return Socket_.Good() && IsValid_; +} + +/********************************************************/ +int socketSecureHandler::Connect(const TAddrList& addrs, TDuration Timeout) { + IsValid_ = false; + + int ret = socketRegularHandler::Connect(addrs, Timeout); + if (ret) + return ret; + + gnutls_certificate_allocate_credentials(&Credits_); + gnutls_init(&Session_, GNUTLS_CLIENT); + gnutls_set_default_priority(Session_); + gnutls_credentials_set(Session_, GNUTLS_CRD_CERTIFICATE, Credits_); + + SOCKET fd = Socket_; + gnutls_transport_set_ptr(Session_, (gnutls_transport_ptr)fd); + + ret = gnutls_handshake(Session_); + + if (ret < 0) { + fprintf(stderr, "*** Handshake failed\n"); + gnutls_perror(ret); + + gnutls_deinit(Session_); + if (Credits_) { + gnutls_certificate_free_credentials(Credits_); + Credits_ = 0; + } + return 1; + } + + IsValid_ = true; + return !IsValid_; +} + +/********************************************************/ +void socketSecureHandler::Disconnect() { + if (IsValid_) { + gnutls_bye(Session_, GNUTLS_SHUT_RDWR); + IsValid_ = false; + gnutls_deinit(Session_); + } + + if (Credits_) { + gnutls_certificate_free_credentials(Credits_); + Credits_ = 0; + } + + socketRegularHandler::Disconnect(); +} + +/********************************************************/ +void socketSecureHandler::shutdown() { +} + +/********************************************************/ +bool socketSecureHandler::send(const char* message, ssize_t messlen) { + if (!IsValid_) + return false; + ssize_t rv = gnutls_record_send(Session_, message, messlen); + return rv >= 0; +} + +/********************************************************/ +bool socketSecureHandler::peek() { + //ssize_t rv = gnutls_record_check_pending(mSession); + //return rv>0; + return true; +} + +/********************************************************/ +ssize_t socketSecureHandler::read(void* buffer, ssize_t buflen) { + if (!IsValid_) + return false; + return gnutls_record_recv(Session_, (char*)buffer, buflen); +} + +#endif + +/************************************************************/ +socketAbstractHandler* socketHandlerFactory::chooseHandler(const THttpURL& url) { + if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTP) + return new socketRegularHandler; + +#ifdef USE_GNUTLS + if (url.IsValidGlobal() && url.GetScheme() == THttpURL::SchemeHTTPS) + return new socketSecureHandler; +#endif + + return nullptr; +} + +/************************************************************/ +socketHandlerFactory socketHandlerFactory::sInstance; +/************************************************************/ diff --git a/library/cpp/http/fetch/httpagent.h b/library/cpp/http/fetch/httpagent.h new file mode 100644 index 0000000000..96475cc05d --- /dev/null +++ b/library/cpp/http/fetch/httpagent.h @@ -0,0 +1,316 @@ +#pragma once + +#include <cstdio> +#include <cstring> +#include <cstdlib> + +#include <library/cpp/uri/http_url.h> +#include <util/datetime/base.h> +#include <util/network/hostip.h> +#include <util/network/ip.h> +#include <util/network/sock.h> +#include <util/generic/scope.h> +#include <util/generic/utility.h> +#include <util/string/cast.h> + +#include "exthttpcodes.h" +#include "sockhandler.h" + +class TIpResolver { +public: + TAddrList Resolve(const char* host, TIpPort port) const { + try { + TAddrList result; + TNetworkAddress na(host, port); + for (auto i = na.Begin(); i != na.End(); ++i) { + const struct addrinfo& ai = *i; + switch (ai.ai_family) { + case AF_INET: + result.push_back(new NAddr::TIPv4Addr(*(sockaddr_in*)ai.ai_addr)); + break; + case AF_INET6: + result.push_back(new NAddr::TIPv6Addr(*(sockaddr_in6*)ai.ai_addr)); + break; + } + } + return result; + } catch (const TNetworkResolutionError&) { + } + return TAddrList(); + } +}; + +namespace NResolverHelpers { + Y_HAS_MEMBER(Resolve); + + template <typename TResolver> + std::enable_if_t<TClassHasResolve<TResolver>::value, TAddrList> Resolve(const TResolver& r, const char* host, TIpPort port) { + return r.Resolve(host, port); + } + + template <typename TResolver> + std::enable_if_t<!TClassHasResolve<TResolver>::value, TAddrList> Resolve(const TResolver& r, const char* host, TIpPort port) { + ui32 ip = 0; + if (r.GetHostIP(host, &ip)) { + // error + return TAddrList(); + } + if (!ip) { + return TAddrList(); + } + + return TAddrList::MakeV4Addr(ip, port); + } +} + +template <typename TBase> +class TIpResolverWrapper { +private: + TBase Base; + +public: + TIpResolverWrapper() = default; + + template <typename T> + TIpResolverWrapper(T&& base) + : Base(std::forward(base)) + { + } + + TAddrList Resolve(const char* host, TIpPort port) const { + return NResolverHelpers::Resolve(Base, host, port); + } +}; + +template <class TSocketHandler = TSimpleSocketHandler, class TDnsClient = TIpResolver> +class THttpAgent { +public: + THttpAgent() + : Persistent(0) + , Timeout(TDuration::MicroSeconds(150)) + , Hostheader(nullptr) + , Footer(nullptr) + , AltFooter(nullptr) + , PostData(nullptr) + , PostDataLen(0) + , Method(nullptr) + , MethodLen(0) + , HostheaderLen(0) + { + SetIdentification("YandexSomething/1.0", "webadmin@yandex.ru"); + } + + ~THttpAgent() { + Disconnect(); + free(Hostheader); + free(Footer); + } + + void SetIdentification(const char* user_agent, const char* http_from) { + free(Footer); + size_t len = user_agent ? strlen(user_agent) + 15 : 0; + len += http_from ? strlen(http_from) + 9 : 0; + len += 3; + Footer = (char*)malloc(len); + if (user_agent) + strcat(strcat(strcpy(Footer, "User-Agent: "), user_agent), "\r\n"); + if (http_from) + strcat(strcat(strcat(Footer, "From: "), http_from), "\r\n"); + } + + void SetUserAgentFooter(const char* altFooter) { + AltFooter = altFooter; + } + + void SetPostData(const char* postData, size_t postDataLen) { + PostData = postData; + PostDataLen = postDataLen; + } + + void SetMethod(const char* method, size_t methodLen) { + Method = method; + MethodLen = methodLen; + } + + // deprecated + ui32 GetIp() const { + return Addrs.GetV4Addr().first; + } + + int GetScheme() const { + return THttpURL::SchemeHTTP; + } + void SetTimeout(TDuration tim) { + Timeout = tim; + } + + void SetConnectTimeout(TDuration timeout) { + ConnectTimeout = timeout; + } + + int Disconnected() { + return !Persistent || !Socket.Good(); + } + + int SetHost(const char* hostname, TIpPort port) { + Disconnect(); + TAddrList addrs = DnsClient.Resolve(hostname, port); + if (!addrs.size()) { + return 1; + } + + SetHost(hostname, port, addrs); + return 0; + } + + int SetHost(const char* hostname, TIpPort port, const TAddrList& addrs) { + Disconnect(); + Addrs = addrs; + size_t reqHostheaderLen = strlen(hostname) + 20; + if (HostheaderLen < reqHostheaderLen) { + free(Hostheader); + Hostheader = (char*)malloc((HostheaderLen = reqHostheaderLen)); + } + if (port == 80) + sprintf(Hostheader, "Host: %s\r\n", hostname); + else + sprintf(Hostheader, "Host: %s:%u\r\n", hostname, port); + pHostBeg = strchr(Hostheader, ' ') + 1; + pHostEnd = strchr(pHostBeg, '\r'); + // convert hostname to lower case since some web server don't like + // uppper case (Task ROBOT-562) + for (char* p = pHostBeg; p < pHostEnd; p++) + *p = tolower(*p); + return 0; + } + + // deprecated v4-only + int SetHost(const char* hostname, TIpPort port, ui32 ip) { + return SetHost(hostname, port, TAddrList::MakeV4Addr(ip, port)); + } + + void SetHostHeader(const char* host) { + size_t reqHostheaderLen = strlen(host) + 20; + if (HostheaderLen < reqHostheaderLen) { + delete[] Hostheader; + Hostheader = new char[(HostheaderLen = reqHostheaderLen)]; + } + sprintf(Hostheader, "Host: %s\r\n", host); + } + + void SetSocket(SOCKET fd) { + Socket.SetSocket(fd); + } + + SOCKET PickOutSocket() { + return Socket.PickOutSocket(); + } + + void Disconnect() { + Socket.Disconnect(); + } + + ssize_t read(void* buffer, size_t buflen) { + return Socket.read(buffer, buflen); + } + + int RequestGet(const char* url, const char* const* headers, int persistent = 1, bool head_request = false) { + if (!Addrs.size()) + return HTTP_DNS_FAILURE; + char message[MessageMax]; + ssize_t messlen = 0; + if (Method) { + strncpy(message, Method, MethodLen); + message[MethodLen] = ' '; + messlen = MethodLen + 1; + } else if (PostData) { + strcpy(message, "POST "); + messlen = 5; + } else if (head_request) { + strcpy(message, "HEAD "); + messlen = 5; + } else { + strcpy(message, "GET "); + messlen = 4; + } +#define _AppendMessage(mes) messlen += Min(MessageMax - messlen, \ + (ssize_t)strlcpy(message + messlen, (mes), MessageMax - messlen)) + _AppendMessage(url); + _AppendMessage(" HTTP/1.1\r\n"); + if (*url == '/') //if not then Host is a proxy + _AppendMessage(Hostheader); + _AppendMessage("Connection: "); + _AppendMessage(persistent ? "Keep-Alive\r\n" : "Close\r\n"); + while (headers && *headers) + _AppendMessage(*headers++); + if (AltFooter) + _AppendMessage(AltFooter); + else + _AppendMessage(Footer); + _AppendMessage("\r\n"); +#undef _AppendMessage + if (messlen >= MessageMax) + return HTTP_HEADER_TOO_LARGE; + + if (!Persistent) + Disconnect(); + Persistent = persistent; + int connected = Socket.Good(); + for (int attempt = !connected; attempt < 2; attempt++) { + const auto connectTimeout = ConnectTimeout ? ConnectTimeout : Timeout; + if (!Socket.Good() && Socket.Connect(Addrs, connectTimeout)) + return HTTP_CONNECT_FAILED; + + int sendOk = Socket.send(message, messlen); + if (sendOk && PostData && PostDataLen) + sendOk = Socket.send(PostData, PostDataLen); + if (!sendOk) { + int err = errno; + Disconnect(); + errno = err; + continue; + } + + if (!Socket.peek()) { + int err = errno; + Disconnect(); + if (err == EINTR) { + errno = err; + return HTTP_INTERRUPTED; + } + } else { + if (!persistent) + Socket.shutdown(); + return 0; + } + } + return connected ? HTTP_CONNECTION_LOST : HTTP_CONNECT_FAILED; + } + +protected: + TSocketHandler Socket; + TIpResolverWrapper<TDnsClient> DnsClient; + TAddrList Addrs; + int Persistent; + TDuration Timeout; + TDuration ConnectTimeout; + char *Hostheader, *Footer, *pHostBeg, *pHostEnd; + const char* AltFooter; // alternative footer can be set by the caller + const char* PostData; + size_t PostDataLen; + const char* Method; + size_t MethodLen; + unsigned short HostheaderLen; + static const ssize_t MessageMax = 32768; +}; + +struct TNoTimer { + inline void OnBeforeSend() { + } + inline void OnAfterSend() { + } + inline void OnBeforeRecv() { + } + inline void OnAfterRecv() { + } +}; diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h new file mode 100644 index 0000000000..7fc251afd2 --- /dev/null +++ b/library/cpp/http/fetch/httpfetcher.h @@ -0,0 +1,171 @@ +#pragma once + +#ifdef _MSC_VER +#include <io.h> +#endif + +#include <library/cpp/http/misc/httpdate.h> + +#include "httpagent.h" +#include "httpparser.h" + +struct TFakeBackup { + int Write(void* /*buf*/, size_t /*size*/) { + return 0; + } +}; + +template <size_t bufsize = 5000> +struct TFakeAlloc { + void Shrink(void* /*buf*/, size_t /*size*/) { + } + void* Grab(size_t /*min*/, size_t* real) { + *real = bufsize; + return buf; + } + char buf[bufsize]; +}; + +template <typename TAlloc = TFakeAlloc<>, + typename TCheck = TFakeCheck<>, + typename TWriter = TFakeBackup, + typename TAgent = THttpAgent<>> +class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent { +public: + static const size_t TCP_MIN = 1500; + static int TerminateNow; + + THttpFetcher() + : THttpParser<TCheck>() + , TAlloc() + , TWriter() + , TAgent() + { + } + + virtual ~THttpFetcher() { + } + + int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) { + int ret = 0; + int fetcherr = 0; + + THttpParser<TCheck>::Init(header, head_request); + const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme()); + size_t schemelen = strlen(scheme); + if (*path == '/') { + header->base = TStringBuf(scheme, schemelen); + header->base += TStringBuf("://", 3); + header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg); + header->base += path; + } else { + if (strlen(path) >= FETCHER_URL_MAX) { + header->error = HTTP_URL_TOO_LARGE; + return 0; + } + header->base = path; + } + + if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) { + header->error = (i16)ret; + return 0; + } + + bool inheader = 1; + void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr; + ssize_t got; + size_t buffree = 0, bufsize = 0, buflen = 0; + size_t maxsize = TCheck::GetMaxHeaderSize(); + do { + if (buffree < TCP_MIN) { + if (buf) { + TAlloc::Shrink(buf, buflen - buffree); + if (TWriter::Write(buf, buflen - buffree) < 0) { + buf = nullptr; + ret = EIO; + break; + } + } + if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) { + ret = ENOMEM; + break; + } + bufptr = buf; + buffree = buflen; + } + if ((got = TAgent::read(bufptr, buffree)) < 0) { + fetcherr = errno; + if (errno == EINTR) + header->error = HTTP_INTERRUPTED; + else if (errno == ETIMEDOUT) + header->error = HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING; + else + header->error = HTTP_CONNECTION_LOST; + + break; + } + + parsebuf = bufptr; + bufptr = (char*)bufptr + got; + bufsize += got; + buffree -= got; + + THttpParser<TCheck>::Parse(parsebuf, got); + + if (header->error) + break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached + + if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) { + inheader = 0; + if (TCheck::Check(header)) + break; + if (header->header_size > (long)maxsize) { + header->error = HTTP_HEADER_TOO_LARGE; + break; + } + } + if (!inheader) { + maxsize = TCheck::GetMaxBodySize(header); + } + if (header->http_status >= HTTP_EXTENDED) + break; + if (bufsize > maxsize) { + header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE; + break; + } + if (TerminateNow) { + header->error = HTTP_INTERRUPTED; + break; + } + } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof); + + i64 Adjustment = 0; + if (!header->error) { + if (header->transfer_chunked) { + Adjustment = header->header_size + header->entity_size - bufsize - 1; + } else if (header->content_length >= 0) { + Adjustment = header->header_size + header->content_length - bufsize; + } + if (Adjustment > 0) + Adjustment = 0; + } + + if (buf) { + TAlloc::Shrink(buf, buflen - buffree + Adjustment); + + if (TWriter::Write(buf, buflen - buffree) < 0) + ret = EIO; + } + TCheck::CheckEndDoc(header); + if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) { + TAgent::Disconnect(); + if (!fetcherr) + fetcherr = errno; + } + errno = fetcherr; + return ret; + } +}; + +template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent> +int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0; diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h new file mode 100644 index 0000000000..c4abdcd0d2 --- /dev/null +++ b/library/cpp/http/fetch/httpfsm.h @@ -0,0 +1,104 @@ +#pragma once + +#include "httpheader.h" + +#include <util/system/maxlen.h> +#include <util/datetime/parser.h> + +#include <time.h> + +struct THttpHeaderParser { + static constexpr int ErrFirstlineTypeMismatch = -3; + static constexpr int ErrHeader = -2; + static constexpr int Err = -1; + static constexpr int Final = 0; + static constexpr int NeedMore = 1; + static constexpr int Accepted = 2; + + int Execute(const void* inBuf, size_t len) { + return execute((unsigned char*)inBuf, (int)len); + } + + int Execute(TStringBuf str) { + return Execute(str.data(), str.size()); + } + + int Init(THttpHeader* h) { + int ret = Init((THttpBaseHeader*)(h)); + hd = h; + hd->Init(); + hreflangpos = hd->hreflangs; + hreflangspace = HREFLANG_MAX; + return ret; + } + + int Init(THttpAuthHeader* h) { + int ret = Init((THttpHeader*)(h)); + auth_hd = h; + return ret; + } + int Init(THttpRequestHeader* h) { + int ret = Init((THttpBaseHeader*)(h)); + request_hd = h; + request_hd->Init(); + return ret; + } + + THttpHeader* hd; + long I; + int Dc; + TDateTimeFieldsDeprecated DateTimeFields; + char buf[FETCHER_URL_MAX]; + size_t buflen; + char* lastchar; + + const unsigned char* langstart; + size_t langlen; + + char* hreflangpos; + size_t hreflangspace; + + bool AcceptingXRobots; + + THttpAuthHeader* auth_hd; + THttpRequestHeader* request_hd; + +private: + THttpBaseHeader* base_hd; + int cs; + +private: + int Init(THttpBaseHeader* header) { + base_hd = header; + auth_hd = nullptr; + request_hd = nullptr; + hd = nullptr; + init(); + return 0; + } + + int execute(unsigned char* inBuf, int len); + void init(); +}; + +struct THttpChunkParser { + int Execute(const void* inBuf, int len) { + return execute((unsigned char*)inBuf, len); + } + + int Init() { + init(); + return 0; + } + + int chunk_length; + char* lastchar; + long I; + int Dc; + i64 cnt64; + +private: + int cs; + int execute(unsigned char* inBuf, int len); + void init(); +}; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 new file mode 100644 index 0000000000..eab0328b18 --- /dev/null +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -0,0 +1,684 @@ +#include <stdio.h> +#include <time.h> + +#include <library/cpp/charset/doccodes.h> +#include <library/cpp/charset/codepage.h> +#include <library/cpp/http/misc/httpcodes.h> +#include <util/datetime/base.h> +#include <util/generic/ylimits.h> +#include <algorithm> // max + +#include <library/cpp/http/fetch/httpheader.h> +#include <library/cpp/http/fetch/httpfsm.h> + +#ifdef _MSC_VER +#pragma warning(disable: 4702) // unreachable code +#endif + +#define c(i) I = i; +#define m(i) I = std::max(I, (long)i); + +static inline int X(unsigned char c) { + return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0')); +} + +template <typename x> +static inline void guard(x &val) { + val = (val >= -1) ? -4 - val : -2; // f(-2) = -2 +} + +template <typename x> +static inline void setguarded(x &val, long cnt) { + val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2; +} + +//////////////////////////////////////////////////////////////////// +/// HTTP PARSER +//////////////////////////////////////////////////////////////////// + +%%{ +machine http_header_parser; + +include HttpDateTimeParser "../../../../util/datetime/parser.rl6"; + +alphtype unsigned char; + +################# 2.2 Basic Rules ################# +eol = '\r'? '\n'; +ws = [ \t]; +lw = '\r'? '\n'? ws; +separator = [()<>@,;:\\"/\[\]?={}]; +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..126 | 128..255; +any_text_char = any - [\r\n]; + +lws = lw*; +eoh = lws eol; +token = token_char+; +ex_token = (token_char | ws)* token_char; +text = (text_char | lw)*; +any_text = (any_text_char | lw)*; +def = lws ':' lws; + +action clear_buf { buflen = 0; } +action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; } + +################################################### +############ response status line ################# +action set_minor { base_hd->http_minor = I; } +action set_status { + if (hd) { + hd->http_status = I; + } + if (request_hd) { + return -3; + } +} + +status_code = int3; +http_major = int; +http_minor = int; +reason_phrase = ws+ text_char*; +http_version = "http/"i http_major '.' http_minor %set_minor; +response_status_line = http_version ws+ status_code reason_phrase? eol %set_status; + +############ request status line ################# +action set_request_uri { + if (request_hd && buflen < FETCHER_URL_MAX) { + if (!request_hd->request_uri.empty()) { + return -2; + } + request_hd->request_uri =TStringBuf(buf, buflen); + } +} +action set_http_method { + if (request_hd) { + request_hd->http_method = I; + } + if (hd) { + return -3; + } +} + +http_extension_method = token; +http_method = ("options"i %{c(0)} @1 + | "get"i %{c(1)} @1 + | "head"i %{c(2)} @1 + | "post"i %{c(3)} @1 + | "put"i %{c(4)} @1 + | "delete"i %{c(5)} @1 + | "trace"i %{c(6)} @1 + | "connect"i %{c(7)} @1 + | http_extension_method %{c(8)} $0) + %set_http_method; +request_uri = (token_char | separator)+ >clear_buf $update_buf + %set_request_uri; +request_status_line = http_method ws+ request_uri ws+ http_version eoh; + +################# connection ###################### +action beg_connection { guard(base_hd->connection_closed); I = -1; } +action set_connection { setguarded(base_hd->connection_closed, I); } + +c_token = "close"i %{m(1)} + | "keep-alive"i %{m(0)}; +c_tokenlist = c_token (lws ',' lws c_token)?; +connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection; + +################# content-encoding ################ +action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; } +action set_content_encoding { base_hd->compression_method = + ((base_hd->compression_method == HTTP_COMPRESSION_UNSET || + base_hd->compression_method == I) ? + I : (int)HTTP_COMPRESSION_ERROR); } + +ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} + | "gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)} + | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)} + | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)} + | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)}; +content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding; + +################# transfer-encoding ############### +action beg_encoding { guard(base_hd->transfer_chunked); } +action set_encoding { setguarded(base_hd->transfer_chunked, I); } + +e_tokenlist = "identity"i %{c(0)} + | "chunked"i %{c(1)}; +transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding; + +################# content-length ################## +action beg_content_length { guard(base_hd->content_length); } +action set_content_length { setguarded(base_hd->content_length, I); } + +content_length = "content-length"i def %beg_content_length int eoh %set_content_length; + +################# content-range ################### +action beg_content_range_start { guard(base_hd->content_range_start); I = -1; } +action set_content_range_start { setguarded(base_hd->content_range_start, I); } +action beg_content_range_end { guard(base_hd->content_range_end); I = -1; } +action set_content_range_end { setguarded(base_hd->content_range_end, I); } +action beg_content_range_el { guard(base_hd->content_range_entity_length); I = -1; } +action set_content_range_el { setguarded(base_hd->content_range_entity_length, I); } + +content_range = "content-range"i def "bytes"i sp %beg_content_range_start int '-' %set_content_range_start + %beg_content_range_end int '/' %set_content_range_end + %beg_content_range_el int eoh %set_content_range_el; + +################# accept-ranges ################### +action beg_accept_ranges { + if (hd) { + guard(hd->accept_ranges); + I = -1; + } +} +action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); } + +ar_tokenlist = "bytes"i %{c(1)} + | "none"i %{c(0)}; +accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges; + +################# content-type #################### +action beg_mime { guard(base_hd->mime_type); } +action set_mime { setguarded(base_hd->mime_type, I); } +action set_charset { + if (buflen < FETCHER_URL_MAX) { + buf[buflen++] = 0; + base_hd->charset = EncodingHintByName((const char*)buf); + } +} + +mime_type = "text/plain"i %{c(MIME_TEXT)} + | "text/html"i %{c(MIME_HTML)} + | "application/pdf"i %{c(MIME_PDF)} + | "application/rtf"i %{c(MIME_RTF)} + | "text/rtf"i %{c(MIME_RTF)} + | "application/msword"i %{c(MIME_DOC)} + | "audio/mpeg"i %{c(MIME_MPEG)} + | "text/xml"i %{c(MIME_XML)} + | "application/xml"i %{c(MIME_XML)} + | "application/rss+xml"i %{c(MIME_RSS)} + | "application/rdf+xml"i %{c(MIME_RSS)} + | "application/atom+xml"i %{c(MIME_RSS)} + | "text/vnd.wap.wml"i %{c(MIME_WML)} + | "application/x-shockwave-flash"i %{c(MIME_SWF)} + | "application/vnd.ms-excel"i %{c(MIME_XLS)} + | "application/vnd.ms-powerpoint"i %{c(MIME_PPT)} + | "image/jpeg"i %{c(MIME_IMAGE_JPG)} + | "image/jpg"i %{c(MIME_IMAGE_JPG)} + | "image/pjpeg"i %{c(MIME_IMAGE_PJPG)} + | "image/png"i %{c(MIME_IMAGE_PNG)} + | "image/gif"i %{c(MIME_IMAGE_GIF)} + | "application/xhtml+xml"i %{c(MIME_XHTMLXML)} + | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"i %{c(MIME_DOCX)} + | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"i %{c(MIME_XLSX)} + | "application/vnd.openxmlformats-officedocument.presentationml.presentation"i %{c(MIME_PPTX)} + | "application/vnd.oasis.opendocument.text"i %{c(MIME_ODT)} + | "application/vnd.oasis.opendocument.presentation"i %{c(MIME_ODP)} + | "application/vnd.oasis.opendocument.spreadsheet"i %{c(MIME_ODS)} + | "application/vnd.oasis.opendocument.graphics"i %{c(MIME_ODG)} + | "image/x-ms-bmp"i %{c(MIME_IMAGE_BMP)} + | "image/bmp"i %{c(MIME_IMAGE_BMP)} + | "audio/x-wav"i %{c(MIME_WAV)} + | ( "application/x-tar"i | "application/x-ustar"i | "application/x-gtar"i | "application/zip"i | "application/x-archive"i + | "application/x-bzip2"i | "application/x-rar"i ) %{c(MIME_ARCHIVE)} + | "application/x-dosexec"i %{c(MIME_EXE)} + | "application/x-gzip"i %{c(MIME_GZIP)} + | "application/json"i %{c(MIME_JSON)} + | ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)} + | "application/vnd.android.package-archive"i %{c(MIME_APK)} + | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)} + ; + + +charset_name = token_char+ >clear_buf $update_buf; +mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2 + | token ws* '=' ws* '"'? token '"'? @1 + | text $0; +mime_parms = (lws ';' lws mime_param)*; +content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime; + +################# last modified ################### +action beg_modtime { guard(base_hd->http_time); } +action set_modtime { + setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1)); +} + +last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime; + +################# location ######################## +action set_location { + while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) { + buflen --; + } + if (hd && buflen < FETCHER_URL_MAX) { + hd->location = TStringBuf(buf, buflen); + } +} + +action set_status_303{ if (hd) hd->http_status = 303; } + +url = url_char+ >clear_buf $update_buf; +loc_url = any_text_char+ >clear_buf $update_buf; +location = "location"i def loc_url eoh %set_location; +refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; + +################# x-robots-tag ################ +action set_x_robots { + if (hd && AcceptingXRobots) { + if (I > 0) + hd->x_robots_tag |= I; + + int pos = (I > 0 ? I : -I); + for (size_t i = 0; i < 5; ++i) + if (abs(pos) & (1 << i)) // permissive flags take priority + hd->x_robots_state[i] = (I < 0) ? '1' : (hd->x_robots_state[i] != '1') ? '0' : '1'; + } +} + +action accept_x_robots { + AcceptingXRobots = (bool)I; +} + +x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)} + | "noindex"i %{c(1)} | "index"i %{c(-1)} + | "nofollow"i %{c(2)} | "follow"i %{c(-2)} + | "noarchive"i %{c(4)} | "archive"i %{c(-4)} + | "noyaca"i %{c(16)} + | "noodp"i %{c(8)}; + +any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*; +any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*; + +unavailable_after_directive = "unavailable_after"i def any_value; + +yandex_robot = "yandex"i | "yandexbot"i; +other_robot = any_key - "unavailable_after"i - yandex_robot; +robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)}; + +x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? ); + +x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh; + +################# rel_canonical ############### +action set_canonical { + if (hd && buflen < FETCHER_URL_MAX) { + hd->rel_canonical = TStringBuf(buf, buflen); + } +} + +rel_canonical = "link"i def '<' url ">;"i lws "rel"i lws '=' lws "\"canonical\"" eoh %set_canonical; +################# hreflang ############### +action set_hreflang { + bool first = (hreflangpos == hd->hreflangs); + size_t len2 = (first ? 0 : 1) + langlen + 1 + buflen; + if (langlen && len2 < hreflangspace) { + if (!first) { + *(hreflangpos++) = '\t'; + } + memcpy(hreflangpos, langstart, langlen); + hreflangpos += langlen; + *(hreflangpos++) = ' '; + memcpy(hreflangpos, buf, buflen); + hreflangpos += buflen; + *(hreflangpos) = 0; + hreflangspace -= len2; + } +} + +action start_lang { + langstart = fpc; + langlen = 0; +} +action end_lang { + langlen = fpc - langstart; +} +hreflang_token = (token_char - ['])+; +quote = ['"]?; #" +lang = hreflang_token >start_lang %end_lang; + +hreflang = "link"i def '<' url '>' lws ";" lws + ( ( "rel"i lws '=' lws quote "alternate" quote lws ';' lws "hreflang"i lws '=' lws quote lang quote ) + | ( "hreflang"i lws '=' lws quote lang quote lws ';' lws "rel"i lws '=' lws quote "alternate" quote ) ) + eoh %set_hreflang; +################# squid_error ################# +action set_squid_error { + hd->squid_error = 1; +} + +squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; + +################# auth ######################## +action init_auth { + if (auth_hd) + auth_hd->use_auth=true; +} + +action update_auth_buf + { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; } + +quoted_str = /"/ (text_char - /"/)* /"/ >2; +auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2; + +# do not support auth-int, too heavy procedure + +qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; }; + +qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 ); + +auth_good_param = ( "nonce"i /=/ auth_quoted_str ) + %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { + buf[buflen++] = 0; + auth_hd->nonce = strdup((const char*)buf); + }} + | ( "realm"i /=/ auth_quoted_str ) + %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { + buf[buflen++] = 0; + auth_hd->realm = strdup((const char*)buf); + }} + | ( "opaque"i /=/ auth_quoted_str ) + %{if (auth_hd && buflen < FETCHER_URL_MAX-1) { + buf[buflen++] = 0; + auth_hd->opaque = strdup((const char*)buf); + }} + | "stale"i /=/ "true"i + %{if (auth_hd) auth_hd->stale = true; } + | "algorithm"i /=/ "md5"i /-/ "sess"i + %{if (auth_hd) auth_hd->algorithm = 1; } + | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/); + +auth_param = auth_good_param @1 | + ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) ) + /=/ (token | quoted_str ) ) $0; + +auth_params = auth_param ( ws* /,/ ws* auth_param )*; + +digest_challenge = ("digest"i %init_auth ws+ auth_params) | + ((token-"digest"i) text); + +auth = "www-authenticate"i def digest_challenge eoh; + +###################### host ####################### +action set_host { + if (request_hd && buflen < HOST_MAX) { + buf[buflen++] = 0; + if (request_hd->host[0] != 0) { + return -2; + } + memcpy(request_hd->host, buf, buflen); + } +} + +host = (url_char | [:])* >clear_buf $update_buf; +host_header = "host"i def host eoh %set_host; + +###################### from ####################### +action set_from { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->from[0] != 0) { + return -2; + } + memcpy(request_hd->from, buf, buflen); + } +} + +mailbox = (token "@" token) >clear_buf $update_buf; +from_header = "from"i def mailbox eoh %set_from; + +################### user-agent #################### +action set_user_agent { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->user_agent[0] != 0) { + return -2; + } + memcpy(request_hd->user_agent, buf, buflen); + } +} + +user_agent = any_text_char* >clear_buf $update_buf; +user_agent_header = "user-agent"i def user_agent eoh %set_user_agent; + +############### x-yandex-langregion ################ +action set_langregion { + if (request_hd && buflen < MAX_LANGREGION_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_langregion[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_langregion, buf, buflen); + } +} + +langregion = any_text_char* >clear_buf $update_buf; +langregion_header = "x-yandex-langregion"i def langregion eoh %set_langregion; + +############### x-yandex-sourcename ################ +action set_sourcename { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_sourcename[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_sourcename, buf, buflen); + } +} + +sourcename = any_text_char* >clear_buf $update_buf; +sourcename_header = "x-yandex-sourcename"i def sourcename eoh %set_sourcename; + +############### x-yandex-requesttype ############### +action set_requesttype { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_requesttype[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_requesttype, buf, buflen); + } +} + +requesttype = any_text_char* >clear_buf $update_buf; +requesttype_header = "x-yandex-requesttype"i def requesttype eoh %set_requesttype; + +################ x-yandex-fetchoptions ############### +action set_fetchoptions { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_fetchoptions[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_fetchoptions, buf, buflen); + } +} + +fetchoptions = any_text_char* >clear_buf $update_buf; +fetchoptions_header = "x-yandex-fetchoptions"i def fetchoptions eoh %set_fetchoptions; + +################ if-modified-since ################ +action set_if_modified_since { + if (request_hd) { + request_hd->if_modified_since = DateTimeFields.ToTimeT(-1); + } +} + +if_modified_since = "if-modified-since"i def http_date eoh + %set_if_modified_since; + +################ retry-after ################ +action set_retry_after_withdate { + if (hd) { + hd->retry_after = DateTimeFields.ToTimeT(-1); + } +} + +action set_retry_after_withdelta { + if (hd) { + hd->retry_after = TInstant::Now().Seconds() + I; + } +} + +retry_after_withdate = "retry-after"i def http_date eoh + %set_retry_after_withdate; +retry_after_withdelta = "retry-after"i def int eoh + %set_retry_after_withdelta; + +############## request-cache-control ############## +action SETMAXAGE { if (request_hd) request_hd->max_age = I; } + +delta_seconds = int; +cache_extension = token ("=" (token | quoted_str))?; +request_cache_directive = "no-cache"i + | "no-store"i + | ("max-age"i "=" delta_seconds %SETMAXAGE) + | ("max-stale"i ("=" delta_seconds)?) + | ("min-fresh"i "=" delta_seconds) + | "non-transform"i + | "only-if-cached"i + | cache_extension; +request_cache_control = "cache-control"i def request_cache_directive eoh; + +############ x-yandex-response-timeout ############# + +action set_response_timeout { + if (request_hd) { + request_hd->x_yandex_response_timeout = I; + } +} + +response_timeout = "x-yandex-response-timeout"i def int eoh + %set_response_timeout; + +############ x-yandex-request-priority ############# + +action set_request_priority { + if (request_hd) { + request_hd->x_yandex_request_priority = I; + } +} + +request_priority = "x-yandex-request-priority"i def int eoh + %set_request_priority; + +################# message header ################## +other_header = ( ex_token - "www-authenticate"i ) def any_text eoh; +message_header = other_header $0 + | connection @1 + | content_encoding @1 + | transfer_encoding @1 + | content_length @1 + | content_type @1 + | last_modified @1 + | refresh @1 + | content_range @1; +response_header = message_header $0 + | auth @1 + | accept_ranges @1 + | location @1 + | x_robots_tag @1 + | rel_canonical @1 + | hreflang @1 + | squid_error @1 + | retry_after_withdate @1 + | retry_after_withdelta @1; +request_header = message_header $0 + | from_header @1 + | host_header @1 + | user_agent_header @1 + | sourcename_header @1 + | requesttype_header @1 + | langregion_header @1 + | fetchoptions_header @1 + | if_modified_since @1 + | request_cache_control @1 + | response_timeout @1 + | request_priority @1; + +################# main ############################ +action accepted { lastchar = (char*)fpc; return 2; } + +main := ((response_status_line ('\r'? response_header)*) + | (request_status_line ('\r' ? request_header)*)) + eol @accepted; + +}%% + +%% write data; + +int THttpHeaderParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_header_parser_error) + return -1; + else if (cs == http_header_parser_first_final) + return 0; + else + return 1; +} + +void THttpHeaderParser::init() { + %% write init; +} + +%%{ +machine http_chunk_parser; + +alphtype unsigned char; + +action clear_hex { cnt64 = 0; } +action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; } +action set_chunk { chunk_length = static_cast<int>(cnt64); } +action accepted { lastchar = (char*)fpc; return 2; } + +eol = '\r'? '\n'; +ws = [ \t]; +sp = ' '; +lw = '\r'? '\n'? ws; +separator = [()<>@,;:\\"/\[\]?={}]; +token_char = [!-~] - separator; # http tokens chars +url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars +text_char = ws | 33..127 | 160..255; + +lws = lw*; +eoh = lws eol; +token = token_char+; +text = (text_char | lw)*; +def = lws ':' lws; + +hex = (xdigit+) >clear_hex $update_hex; +quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"'; + +chunk_ext_val = token | quoted_string; +chunk_ext_name = token; +chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*; + +entity_header = token def text eoh; +trailer = entity_header*; + +chunk = (hex - '0'+) chunk_extension? %set_chunk; +last_chunk = '0'+ chunk_extension? eol trailer; +main := eol (chunk $0 | last_chunk @1) eol @accepted; + +}%% + +%% write data; + +int THttpChunkParser::execute(unsigned char *inBuf, int len) { + const unsigned char *p = inBuf; + const unsigned char *pe = p + len; + %% write exec; + if (cs == http_chunk_parser_error) + return -1; + else if (cs == http_chunk_parser_first_final) + return 0; + else + return 1; +} + +void THttpChunkParser::init() { + chunk_length = 0; + %% write init; +} diff --git a/library/cpp/http/fetch/httpfsm_ut.cpp b/library/cpp/http/fetch/httpfsm_ut.cpp new file mode 100644 index 0000000000..b018e80101 --- /dev/null +++ b/library/cpp/http/fetch/httpfsm_ut.cpp @@ -0,0 +1,591 @@ +#include "httpfsm.h" +#include "library-htfetch_ut_hreflang_in.h" +#include "library-htfetch_ut_hreflang_out.h" + +#include <util/generic/ptr.h> +#include <library/cpp/charset/doccodes.h> +#include <library/cpp/testing/unittest/registar.h> + +class THttpHeaderParserTestSuite: public TTestBase { + UNIT_TEST_SUITE(THttpHeaderParserTestSuite); + UNIT_TEST(TestRequestHeader); + UNIT_TEST(TestSplitRequestHeader); + UNIT_TEST(TestTrailingData); + UNIT_TEST(TestProxyRequestHeader); + UNIT_TEST(TestIncorrectRequestHeader); + UNIT_TEST(TestLastModified); + UNIT_TEST(TestLastModifiedCorrupted); + UNIT_TEST(TestResponseHeaderOnRequest); + UNIT_TEST(TestRequestHeaderOnResponse); + UNIT_TEST(TestXRobotsTagUnknownTags); + UNIT_TEST(TestXRobotsTagMyBot); + UNIT_TEST(TestXRobotsTagOtherBot); + UNIT_TEST(TestXRobotsTagUnavailableAfterAware); + UNIT_TEST(TestXRobotsTagUnavailableAfterWorks); + UNIT_TEST(TestXRobotsTagOverridePriority); + UNIT_TEST(TestXRobotsTagDoesNotBreakCharset); + UNIT_TEST(TestXRobotsTagAllowsMultiline); + UNIT_TEST(TestRelCanonical); + UNIT_TEST(TestHreflang); + UNIT_TEST(TestHreflangOnLongInput); + UNIT_TEST(TestMimeType); + UNIT_TEST(TestRepeatedContentEncoding); + UNIT_TEST_SUITE_END(); + +private: + THolder<THttpHeaderParser> httpHeaderParser; + +private: + void TestStart(); + void TestFinish(); + +public: + void TestRequestHeader(); + void TestSplitRequestHeader(); + void TestTrailingData(); + void TestProxyRequestHeader(); + void TestIncorrectRequestHeader(); + void TestLastModified(); + void TestLastModifiedCorrupted(); + void TestResponseHeaderOnRequest(); + void TestRequestHeaderOnResponse(); + void TestXRobotsTagUnknownTags(); + void TestXRobotsTagMyBot(); + void TestXRobotsTagOtherBot(); + void TestXRobotsTagUnavailableAfterAware(); + void TestXRobotsTagUnavailableAfterWorks(); + void TestXRobotsTagOverridePriority(); + void TestXRobotsTagDoesNotBreakCharset(); + void TestXRobotsTagAllowsMultiline(); + void TestRelCanonical(); + void TestHreflang(); + void TestHreflangOnLongInput(); + void TestMimeType(); + void TestRepeatedContentEncoding(); +}; + +void THttpHeaderParserTestSuite::TestStart() { + httpHeaderParser.Reset(new THttpHeaderParser()); +} + +void THttpHeaderParserTestSuite::TestFinish() { + httpHeaderParser.Reset(); +} + +void THttpHeaderParserTestSuite::TestRequestHeader() { + TestStart(); + THttpRequestHeader httpRequestHeader; + httpHeaderParser->Init(&httpRequestHeader); + const char* request = "GET /search?q=hi HTTP/1.1\r\n" + "Host: www.google.ru:8080\r\n\r\n"; + i32 result = httpHeaderParser->Execute(request, strlen(request)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0); + UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi"); + UNIT_ASSERT_EQUAL(httpRequestHeader.GetUrl(), "http://www.google.ru:8080/search?q=hi"); + UNIT_ASSERT_EQUAL(httpHeaderParser->lastchar - request + 1, + (i32)strlen(request)); + UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_response_timeout, + DEFAULT_RESPONSE_TIMEOUT); + UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority, + DEFAULT_REQUEST_PRIORITY); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, ""), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, ""), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, ""), 0); + TestFinish(); + UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, DEFAULT_MAX_AGE); +} + +void THttpHeaderParserTestSuite::TestSplitRequestHeader() { + TestStart(); + const char* request = + "GET /search?q=hi HTTP/1.1\r\n" + "Host: www.google.ru:8080 \r\n" + "\r\n"; + const size_t rlen = strlen(request); + + for (size_t n1 = 0; n1 < rlen; n1++) { + for (size_t n2 = n1; n2 < rlen; n2++) { + TString s1{request, 0, n1}; + TString s2{request, n1, n2 - n1}; + TString s3{request, n2, rlen - n2}; + UNIT_ASSERT_EQUAL(s1 + s2 + s3, request); + + THttpRequestHeader httpRequestHeader; + UNIT_ASSERT(0 == httpHeaderParser->Init(&httpRequestHeader)); + i32 result = httpHeaderParser->Execute(s1); + UNIT_ASSERT_EQUAL(result, 1); + result = httpHeaderParser->Execute(s2); + UNIT_ASSERT_EQUAL(result, 1); + result = httpHeaderParser->Execute(s3); + UNIT_ASSERT_EQUAL(result, 2); + + UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0); + UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi"); + } + } + + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestTrailingData() { + TestStart(); + THttpRequestHeader httpRequestHeader; + UNIT_ASSERT(0 == httpHeaderParser->Init(&httpRequestHeader)); + const char* request = + "GET /search?q=hi HTTP/1.1\r\n" + "Host: www.google.ru:8080\r\n" + "\r\n" + "high.ru"; + i32 result = httpHeaderParser->Execute(request, strlen(request)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0); + UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi"); + UNIT_ASSERT_EQUAL(TString(httpHeaderParser->lastchar + 1), "high.ru"); + UNIT_ASSERT_EQUAL(httpRequestHeader.http_minor, 1); + UNIT_ASSERT_EQUAL(httpRequestHeader.transfer_chunked, -1); + UNIT_ASSERT_EQUAL(httpRequestHeader.content_length, -1); + UNIT_ASSERT_EQUAL(httpRequestHeader.connection_closed, -1); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestProxyRequestHeader() { + TestStart(); + THttpRequestHeader httpRequestHeader; + httpHeaderParser->Init(&httpRequestHeader); + const char* request = + "GET http://www.google.ru:8080/search?q=hi HTTP/1.1\r\n" + "X-Yandex-Response-Timeout: 1000\r\n" + "X-Yandex-Request-Priority: 2\r\n" + "X-Yandex-Sourcename: orange\r\n" + "X-Yandex-Requesttype: userproxy\r\n" + "X-Yandex-FetchOptions: d;c\r\n" + "Cache-control: max-age=100\r\n" + "If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT\r\n" + "User-Agent: Yandex/1.01.001 (compatible; Win16; I)\r\n" + "From: webadmin@yandex.ru\r\n\r\n"; + i32 result = httpHeaderParser->Execute(request, strlen(request)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET); + UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_response_timeout, 1000); + UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority, 2); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, "orange"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, "userproxy"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, "d;c"), 0); + UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, 100); + UNIT_ASSERT_VALUES_EQUAL(httpRequestHeader.if_modified_since, + TInstant::ParseIso8601Deprecated("1994-10-29 19:43:31Z").TimeT()); + UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, + "http://www.google.ru:8080/search?q=hi"); + UNIT_ASSERT(httpRequestHeader.GetUrl() == + "http://www.google.ru:8080/search?q=hi"); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, ""), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.from, "webadmin@yandex.ru"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.user_agent, + "Yandex/1.01.001 (compatible; Win16; I)"), + 0); + UNIT_ASSERT_EQUAL(httpHeaderParser->lastchar - request + 1, + (i32)strlen(request)); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestIncorrectRequestHeader() { + TestStart(); + THttpRequestHeader httpRequestHeader; + httpHeaderParser->Init(&httpRequestHeader); + const char* request = "GET /search?q=hi HTP/1.1\r\n" + "Host: www.google.ru:8080\r\n\r\n"; + i32 result = httpHeaderParser->Execute(request, strlen(request)); + UNIT_ASSERT(result != 2); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestLastModified() { + TestStart(); + THttpHeader h; + UNIT_ASSERT(0 == httpHeaderParser->Init(&h)); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "Last-Modified: Thu, 13 Aug 2009 14:27:08 GMT\r\n\r\n"; + UNIT_ASSERT(2 == httpHeaderParser->Execute(headers, strlen(headers))); + UNIT_ASSERT_VALUES_EQUAL( + TInstant::ParseIso8601Deprecated("2009-08-13 14:27:08Z").TimeT(), + h.http_time); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestLastModifiedCorrupted() { + TestStart(); + THttpHeader h; + UNIT_ASSERT(0 == httpHeaderParser->Init(&h)); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "Last-Modified: Thu, 13 Aug 2009 14:\r\n\r\n"; + UNIT_ASSERT(2 == httpHeaderParser->Execute(headers, strlen(headers))); + UNIT_ASSERT(h.http_time < 0); // XXX: don't understand what is the proper value + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: yandex: noindex, nofollow\r\n" + "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: google: noindex, nofollow\r\n" + "x-robots-tag: googlebot: noarchive, noodp\r\n" + "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + // проверяем только что unavailable_after ничего не ломает + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n" + "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + // пока не поддерживается + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1); + //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: all, none\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "11xxx"); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); // NOTE legacy behavior, should be 0 as `all` overrides + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "X-Robots-Tag: noarchive\r\n" + "Content-Type: application/json; charset=utf-8\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); + UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "X-Robots-Tag\r\n" + " :\r\n" + " unavailable_since\r\n" + " :\r\n" + " ,\r\n" + " unavailable_since\r\n" + " :\r\n" + " 01 Jan 2000\r\n" + " 00:00 UTC\r\n" + " ,\r\n" + " yandexbot\r\n" + " :\r\n" + " noindex\r\n" + " ,\r\n" + " garbage\r\n" + " ,\r\n" + " nofollow\r\n" + " ,\r\n" + " other\r\n" + " bot\r\n" + " :\r\n" + " noarchive\r\n" + " ,\r\n" + "Content-Type: application/json; charset=utf-8\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); + UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); + UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestHreflang() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "link: <http://www.high.ru/>; rel='alternate'; hreflang='x-default'\r\n" + "link: <http://www.high.ru/en.html> ;rel = 'alternate' ;hreflang = en_GB \r\n" + "link: <http://www.high.ru/ru.html>;hreflang = ru_RU.KOI8-r ;rel = 'alternate' \r\n" + "\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_VALUES_EQUAL(result, 2); + // UNIT_ASSERT_VALUES_EQUAL(strcmp(httpHeader.hreflangs, "x-default http://www.high.ru/;"), 0); + UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, "x-default http://www.high.ru/\ten_GB http://www.high.ru/en.html\tru_RU.KOI8-r http://www.high.ru/ru.html"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestHreflangOnLongInput() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + TStringBuf testInput(hreflang_ut_in); + TStringBuf testOut(hreflang_ut_out); + i32 result = httpHeaderParser->Execute(testInput.data(), testInput.size()); + UNIT_ASSERT_VALUES_EQUAL(result, 2); + UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, testOut); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestRelCanonical() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "Link: <http://yandex.ru>; rel = \"canonical\"\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.rel_canonical, "http://yandex.ru"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestResponseHeaderOnRequest() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* request = "GET /search?q=hi HTP/1.1\r\n" + "Host: www.google.ru:8080\r\n\r\n"; + i32 result = httpHeaderParser->Execute(request, strlen(request)); + UNIT_ASSERT_EQUAL(result, -3); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestRequestHeaderOnResponse() { + TestStart(); + THttpRequestHeader httpRequestHeader; + httpHeaderParser->Init(&httpRequestHeader); + const char* response = "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "Last-Modified: Thu, 13 Aug 2009 14:\r\n\r\n"; + i32 result = httpHeaderParser->Execute(response, strlen(response)); + UNIT_ASSERT_EQUAL(result, -3); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestMimeType() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: application/json; charset=utf-8\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); + UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestRepeatedContentEncoding() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char *headers = + "HTTP/1.1 200 OK\r\n" + "Server: nginx\r\n" + "Date: Mon, 15 Oct 2018 10:40:44 GMT\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "Connection: keep-alive\r\n" + "Last-Modified: Mon, 15 Oct 2018 03:48:54 GMT\r\n" + "ETag: W/\"5bc40e26-a956d\"\r\n" + "X-Autoru-LB: lb-03-sas.prod.vertis.yandex.net\r\n" + "Content-Encoding: gzip\r\n" + "Content-Encoding: gzip\r\n" + "X-UA-Bot: 1\r\n" + "\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.error, 0); + UNIT_ASSERT_EQUAL(httpHeader.compression_method, 3); + TestFinish(); +} + +UNIT_TEST_SUITE_REGISTRATION(THttpHeaderParserTestSuite); + +Y_UNIT_TEST_SUITE(TestHttpChunkParser) { + static THttpChunkParser initParser() { + THttpChunkParser parser; + parser.Init(); + return parser; + } + + static THttpChunkParser parseByteByByte(const TStringBuf& blob, const TVector<int>& states) { + UNIT_ASSERT(states.size() <= blob.size()); + THttpChunkParser parser{initParser()}; + for (size_t n = 0; n < states.size(); n++) { + const TStringBuf d{blob, n, 1}; + int code = parser.Execute(d.data(), d.size()); + Cout << TString(d).Quote() << " " << code << Endl; + UNIT_ASSERT_EQUAL(code, states[n]); + } + return parser; + } + + static THttpChunkParser parseBytesWithLastState(const TStringBuf& blob, const int last_state) { + TVector<int> states(blob.size() - 1, 1); + states.push_back(last_state); + return parseByteByByte(blob, states); + } + + Y_UNIT_TEST(TestWithoutEolHead) { + const TStringBuf blob{ + "4\r\n" + "____\r\n"}; + TVector<int> states{ + -1, /* 1, -1, + 1, -1, 1, -1, 1, -1 */}; + // as soon as error happens parser state should be considered + // undefined, state is meaningless after the very first `-1` + // moreover, testenv produces `states[1] == -1` for this input and + // my local build produces `states[1] == 1`. + parseByteByByte(blob, states); + } + + Y_UNIT_TEST(TestTrivialChunk) { + const TStringBuf blob{ + "\r\n" + "4\r\n"}; + THttpChunkParser parser(parseBytesWithLastState(blob, 2)); + UNIT_ASSERT_EQUAL(parser.chunk_length, 4); + UNIT_ASSERT_EQUAL(parser.cnt64, 4); + } + + Y_UNIT_TEST(TestNegative) { + const TStringBuf blob{ + "\r\n" + "-1"}; + TVector<int> states{ + 1, 1, + -1, + /* 1 */}; + parseByteByByte(blob, states); + } + + Y_UNIT_TEST(TestLeadingZero) { + const TStringBuf blob{ + "\r\n" + "042\r\n"}; + THttpChunkParser parser(parseBytesWithLastState(blob, 2)); + UNIT_ASSERT_EQUAL(parser.chunk_length, 0x42); + } + + Y_UNIT_TEST(TestIntOverflow) { + const TStringBuf blob{ + "\r\n" + "deadbeef"}; + THttpChunkParser parser(parseBytesWithLastState(blob, -2)); + UNIT_ASSERT_EQUAL(parser.chunk_length, 0); + UNIT_ASSERT_EQUAL(parser.cnt64, 0xdeadbeef); + } + + Y_UNIT_TEST(TestTrivialChunkWithTail) { + const TStringBuf blob{ + "\r\n" + "4\r\n" + "_" // first byte of the chunk + }; + TVector<int> states{ + 1, 1, + 1, 1, 2, + -1}; + parseByteByByte(blob, states); + } + + Y_UNIT_TEST(TestLastChunk) { + // NB: current parser does not permit whitespace before `foo`, + // but I've never seen the feature in real-life traffic + const TStringBuf blob{ + "\r\n" + "000 ;foo = bar \r\n" + "Trailer: bar\r\n" + "\r\n"}; + THttpChunkParser parser(parseBytesWithLastState(blob, 2)); + UNIT_ASSERT_EQUAL(parser.chunk_length, 0); + } +} diff --git a/library/cpp/http/fetch/httpheader.cpp b/library/cpp/http/fetch/httpheader.cpp new file mode 100644 index 0000000000..7d2225b8b7 --- /dev/null +++ b/library/cpp/http/fetch/httpheader.cpp @@ -0,0 +1,7 @@ +#include "httpheader.h" + +const i64 DEFAULT_RETRY_AFTER = -1; +const i64 DEFAULT_IF_MODIFIED_SINCE = -1; +const i32 DEFAULT_MAX_AGE = -1; +const i8 DEFAULT_REQUEST_PRIORITY = -1; +const i32 DEFAULT_RESPONSE_TIMEOUT = -1; diff --git a/library/cpp/http/fetch/httpheader.h b/library/cpp/http/fetch/httpheader.h new file mode 100644 index 0000000000..b2810bbd41 --- /dev/null +++ b/library/cpp/http/fetch/httpheader.h @@ -0,0 +1,287 @@ +#pragma once + +#include "exthttpcodes.h" + +#include <library/cpp/mime/types/mime.h> + +#include <util/system/defaults.h> +#include <util/system/compat.h> +#include <util/generic/string.h> +#include <util/generic/ylimits.h> +#include <util/system/maxlen.h> + +#include <ctime> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <algorithm> + +// This is ugly solution but here a lot of work to do it the right way. +#define FETCHER_URL_MAX 8192 + +extern const i64 DEFAULT_RETRY_AFTER; /// == -1 +extern const i64 DEFAULT_IF_MODIFIED_SINCE; /// == -1 +extern const i32 DEFAULT_MAX_AGE; /// == -1 +extern const i8 DEFAULT_REQUEST_PRIORITY; /// == -1 +extern const i32 DEFAULT_RESPONSE_TIMEOUT; /// == -1 + +#define HTTP_PREFIX "http://" +#define MAX_LANGREGION_LEN 4 +#define MAXWORD_LEN 55 + +enum HTTP_COMPRESSION { + HTTP_COMPRESSION_UNSET = 0, + HTTP_COMPRESSION_ERROR = 1, + HTTP_COMPRESSION_IDENTITY = 2, + HTTP_COMPRESSION_GZIP = 3, + HTTP_COMPRESSION_DEFLATE = 4, + HTTP_COMPRESSION_COMPRESS = 5, + HTTP_COMPRESSION_MAX = 6 +}; + +enum HTTP_METHOD { + HTTP_METHOD_UNDEFINED = -1, + HTTP_METHOD_OPTIONS, + HTTP_METHOD_GET, + HTTP_METHOD_HEAD, + HTTP_METHOD_POST, + HTTP_METHOD_PUT, + HTTP_METHOD_DELETE, + HTTP_METHOD_TRACE, + HTTP_METHOD_CONNECT, + HTTP_METHOD_EXTENSION +}; + +enum HTTP_CONNECTION { + HTTP_CONNECTION_UNDEFINED = -1, + HTTP_CONNECTION_KEEP_ALIVE = 0, + HTTP_CONNECTION_CLOSE = 1 +}; + +/// Class represents general http header fields. +struct THttpBaseHeader { +public: + i16 error; + i32 header_size; + i32 entity_size; + i64 content_length; + i64 http_time; // seconds since epoch + i64 content_range_start; // Content-Range: first-byte-pos + i64 content_range_end; // Content-Range: last-byte-pos + i64 content_range_entity_length; // Content-Range: entity-length + i8 http_minor; + i8 mime_type; + i8 charset; + i8 compression_method; + i8 transfer_chunked; + i8 connection_closed; + TString base; + +public: + void Init() { + error = 0; + header_size = 0; + entity_size = 0; + content_length = -1; + http_time = -1; + http_minor = -1; + mime_type = -1; + charset = -1; + compression_method = HTTP_COMPRESSION_UNSET; + transfer_chunked = -1; + connection_closed = HTTP_CONNECTION_UNDEFINED; + content_range_start = -1; + content_range_end = -1; + content_range_entity_length = -1; + base.clear(); + } + + void Print() const { + printf("content_length: %" PRIi64 "\n", content_length); + printf("http_time: %" PRIi64 "\n", http_time); + printf("http_minor: %" PRIi8 "\n", http_minor); + printf("mime_type: %" PRIi8 "\n", mime_type); + printf("charset: %" PRIi8 "\n", charset); + printf("compression_method: %" PRIi8 "\n", compression_method); + printf("transfer_chunked: %" PRIi8 "\n", transfer_chunked); + printf("connection_closed: %" PRIi8 "\n", connection_closed); + printf("content_range_start: %" PRIi64 "\n", content_range_start); + printf("content_range_end: %" PRIi64 "\n", content_range_end); + printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length); + printf("base: \"%s\"\n", base.c_str()); + printf("error: %" PRIi16 "\n", error); + } + + int SetBase(const char* path, + const char* hostNamePtr = nullptr, + int hostNameLength = 0) { + if (*path == '/') { + base = "http://"; + base += TStringBuf(hostNamePtr, hostNameLength); + base += path; + } else { + base = path; + } + return error; + } +}; + +enum { HREFLANG_MAX = FETCHER_URL_MAX * 2 }; +/// Class represents Http Response Header. +struct THttpHeader: public THttpBaseHeader { +public: + i8 accept_ranges; + i8 squid_error; + i8 x_robots_tag; // deprecated, use x_robots_state instead + i16 http_status; + TString location; + TString rel_canonical; + char hreflangs[HREFLANG_MAX]; + i64 retry_after; + TString x_robots_state; // 'xxxxx' format, see `library/html/zoneconf/parsefunc.cpp` + +public: + void Init() { + THttpBaseHeader::Init(); + accept_ranges = -1; + squid_error = 0; + x_robots_tag = 0; + rel_canonical.clear(); + http_status = -1; + location.clear(); + hreflangs[0] = 0; + retry_after = DEFAULT_RETRY_AFTER; + x_robots_state = "xxxxx"; + } + + void Print() const { + THttpBaseHeader::Print(); + printf("http_status: %" PRIi16 "\n", http_status); + printf("squid_error: %" PRIi8 "\n", squid_error); + printf("accept_ranges: %" PRIi8 "\n", accept_ranges); + printf("location: \"%s\"\n", location.c_str()); + printf("retry_after: %" PRIi64 "\n", retry_after); + } +}; + +struct THttpRequestHeader: public THttpBaseHeader { +public: + TString request_uri; + char host[HOST_MAX]; + char from[MAXWORD_LEN]; + char user_agent[MAXWORD_LEN]; + char x_yandex_langregion[MAX_LANGREGION_LEN]; + char x_yandex_sourcename[MAXWORD_LEN]; + char x_yandex_requesttype[MAXWORD_LEN]; + char x_yandex_fetchoptions[MAXWORD_LEN]; + i8 http_method; + i8 x_yandex_request_priority; + i32 x_yandex_response_timeout; + i32 max_age; + i64 if_modified_since; + +public: + THttpRequestHeader() { + Init(); + } + + void Init() { + request_uri.clear(); + host[0] = 0; + from[0] = 0; + user_agent[0] = 0; + x_yandex_langregion[0] = 0; + x_yandex_sourcename[0] = 0; + x_yandex_requesttype[0] = 0; + x_yandex_fetchoptions[0] = 0; + http_method = HTTP_METHOD_UNDEFINED; + x_yandex_request_priority = DEFAULT_REQUEST_PRIORITY; + x_yandex_response_timeout = DEFAULT_RESPONSE_TIMEOUT; + max_age = DEFAULT_MAX_AGE; + if_modified_since = DEFAULT_IF_MODIFIED_SINCE; + THttpBaseHeader::Init(); + } + + void Print() const { + THttpBaseHeader::Print(); + printf("request_uri: \"%s\"\n", request_uri.c_str()); + printf("host: \"%s\"\n", host); + printf("from: \"%s\"\n", from); + printf("user_agent: \"%s\"\n", user_agent); + printf("http_method: %" PRIi8 "\n", http_method); + printf("response_timeout: %" PRIi32 "\n", x_yandex_response_timeout); + printf("max_age: %" PRIi32 "\n", max_age); + printf("if_modified_since: %" PRIi64 "\n", if_modified_since); + } + + /// It doesn't care about errors in request or headers, where + /// request_uri equals to '*'. + /// This returns copy of the string, which you have to delete. + TString GetUrl() { + TString url; + if (host[0] == 0 || !strcmp(host, "")) { + url = request_uri; + } else { + url = HTTP_PREFIX; + url += host; + url += request_uri; + } + return url; + } + + char* GetUrl(char* buffer, size_t size) { + if (host[0] == 0 || !strcmp(host, "")) { + strlcpy(buffer, request_uri.c_str(), size); + } else { + snprintf(buffer, size, "http://%s%s", host, request_uri.c_str()); + } + return buffer; + } +}; + +class THttpAuthHeader: public THttpHeader { +public: + char* realm; + char* nonce; + char* opaque; + bool stale; + int algorithm; + bool qop_auth; + bool use_auth; + + //we do not provide auth-int variant as too heavy + //bool qop_auth_int; + + THttpAuthHeader() + : realm(nullptr) + , nonce(nullptr) + , opaque(nullptr) + , stale(false) + , algorithm(0) + , qop_auth(false) + , use_auth(true) + { + THttpHeader::Init(); + } + + ~THttpAuthHeader() { + free(realm); + free(nonce); + free(opaque); + } + + void Print() { + THttpHeader::Print(); + if (use_auth) { + if (realm) + printf("realm: \"%s\"\n", realm); + if (nonce) + printf("nonce: \"%s\"\n", nonce); + if (opaque) + printf("opaque: \"%s\"\n", opaque); + printf("stale: %d\n", stale); + printf("algorithm: %d\n", algorithm); + printf("qop_auth: %d\n", qop_auth); + } + } +}; diff --git a/library/cpp/http/fetch/httpload.cpp b/library/cpp/http/fetch/httpload.cpp new file mode 100644 index 0000000000..82ea8900b5 --- /dev/null +++ b/library/cpp/http/fetch/httpload.cpp @@ -0,0 +1,373 @@ +#include "httpload.h" + +/************************************************************/ +/************************************************************/ +httpAgentReader::httpAgentReader(httpSpecialAgent& agent, + const char* baseUrl, + bool assumeConnectionClosed, + bool use_auth, + int bufSize) + : Header_() + , Agent_(agent) + , Buffer_(new char[bufSize]) + , BufPtr_(Buffer_) + , BufSize_(bufSize) + , BufRest_(0) +{ + HeadRequest = false; + Header = &Header_; + if (use_auth) + HeaderParser.Init(&Header_); + else + HeaderParser.Init(Header); + setAssumeConnectionClosed(assumeConnectionClosed ? 1 : 0); + Header_.SetBase(baseUrl); + + if (Header_.error) + State = hp_error; + else + State = hp_in_header; +} + +/************************************************************/ +httpAgentReader::~httpAgentReader() { + delete[] Buffer_; +} + +/************************************************************/ +void httpAgentReader::readBuf() { + assert(BufRest_ == 0); + if (!BufPtr_) { + BufRest_ = -1; + return; + } + + BufRest_ = Agent_.read(Buffer_, BufSize_); + if (BufRest_ <= 0) { + BufRest_ = -1; + BufPtr_ = nullptr; + } else { + BufPtr_ = Buffer_; + + //cout << "BUF: " << mBuffer << endl << endl; + } +} + +/************************************************************/ +const THttpHeader* httpAgentReader::readHeader() { + while (State == hp_in_header) { + if (!step()) { + Header_.error = HTTP_CONNECTION_LOST; + return nullptr; + } + ParseGeneric(BufPtr_, BufRest_); + } + if (State == hp_eof || State == hp_error) { + BufPtr_ = nullptr; + BufRest_ = -1; + } + if (State == hp_error || Header_.error) + return nullptr; + return &Header_; +} + +/************************************************************/ +long httpAgentReader::readPortion(void*& buf) { + assert(State != hp_in_header); + + long Chunk = 0; + do { + if (BufSize_ == 0 && !BufPtr_) + return 0; + + if (!step()) + return 0; + + Chunk = ParseGeneric(BufPtr_, BufRest_); + buf = BufPtr_; + + if (State == hp_error && Header_.entity_size > Header_.content_length) { + Chunk -= (Header_.entity_size - Header_.content_length); + BufPtr_ = (char*)BufPtr_ + Chunk; + BufRest_ = 0; + State = hp_eof; + Header_.error = 0; + break; + } + + BufPtr_ = (char*)BufPtr_ + Chunk; + BufRest_ -= Chunk; + + if (State == hp_eof || State == hp_error) { + BufRest_ = -1; + BufPtr_ = nullptr; + } + } while (!Chunk); + return Chunk; +} + +/************************************************************/ +bool httpAgentReader::skipTheRest() { + void* b; + while (!eof()) + readPortion(b); + return (State == hp_eof); +} + +/************************************************************/ +/************************************************************/ +httpLoadAgent::httpLoadAgent(bool handleAuthorization, + socketHandlerFactory& factory) + : Factory_(factory) + , HandleAuthorization_(handleAuthorization) + , URL_() + , PersistentConn_(false) + , Reader_(nullptr) + , Headers_() + , ErrCode_(0) + , RealHost_(nullptr) +{ +} + +/************************************************************/ +httpLoadAgent::~httpLoadAgent() { + delete Reader_; + free(RealHost_); +} + +/************************************************************/ +void httpLoadAgent::clearReader() { + if (Reader_) { + bool opened = false; + if (PersistentConn_) { + const THttpHeader* H = Reader_->readHeader(); + if (H && !H->connection_closed) { + Reader_->skipTheRest(); + opened = true; + } + } + if (!opened) + Disconnect(); + delete Reader_; + Reader_ = nullptr; + } + ErrCode_ = 0; +} +/************************************************************/ +void httpLoadAgent::setRealHost(const char* hostname) { + free(RealHost_); + if (hostname) + RealHost_ = strdup(hostname); + else + RealHost_ = nullptr; + ErrCode_ = 0; +} + +/************************************************************/ +void httpLoadAgent::setIMS(const char* ifModifiedSince) { + char ims_buf[100]; + snprintf(ims_buf, 100, "If-Modified-Since: %s\r\n", + ifModifiedSince); + Headers_.push_back(ims_buf); +} + +/************************************************************/ +void httpLoadAgent::addHeaderInstruction(const char* instr) { + Headers_.push_back(instr); +} + +/************************************************************/ +void httpLoadAgent::dropHeaderInstructions() { + Headers_.clear(); +} + +/************************************************************/ +bool httpLoadAgent::startRequest(const THttpURL& url, + bool persistent, + const TAddrList& addrs) + +{ + clearReader(); + ErrCode_ = 0; + + URL_.Clear(); + URL_ = url; + PersistentConn_ = persistent; + if (!URL_.IsValidAbs()) + return false; + if (!HandleAuthorization_ && !URL_.IsNull(THttpURL::FlagAuth)) + return false; + + return doSetHost(addrs) && doStartRequest(); +} + +/************************************************************/ +bool httpLoadAgent::startRequest(const char* url, + const char* url_to_merge, + bool persistent, + const TAddrList& addrs) { + clearReader(); + + URL_.Clear(); + PersistentConn_ = persistent; + + long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; + if (HandleAuthorization_) + flags |= THttpURL::FeatureAuthSupported; + + if (URL_.Parse(url, flags, url_to_merge) || !URL_.IsValidGlobal()) + return false; + + return doSetHost(addrs) && doStartRequest(); +} + +/************************************************************/ +bool httpLoadAgent::startRequest(const char* url, + const char* url_to_merge, + bool persistent, + ui32 ip) { + clearReader(); + + URL_.Clear(); + PersistentConn_ = persistent; + + long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; + if (HandleAuthorization_) + flags |= THttpURL::FeatureAuthSupported; + + if (URL_.Parse(url, flags, url_to_merge) || !URL_.IsValidGlobal()) + return false; + + return doSetHost(TAddrList::MakeV4Addr(ip, URL_.GetPort())) && doStartRequest(); +} + +/************************************************************/ +bool httpLoadAgent::doSetHost(const TAddrList& addrs) { + socketAbstractHandler* h = Factory_.chooseHandler(URL_); + if (!h) + return false; + Socket.setHandler(h); + + if (addrs.size()) { + ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost), + URL_.GetPort(), addrs); + } else { + ErrCode_ = SetHost(URL_.Get(THttpURL::FieldHost), + URL_.GetPort()); + } + if (ErrCode_) + return false; + + if (RealHost_) { + free(Hostheader); + Hostheader = (char*)malloc(strlen(RealHost_) + 20); + sprintf(Hostheader, "Host: %s\r\n", RealHost_); + } + + if (!URL_.IsNull(THttpURL::FlagAuth)) { + if (!HandleAuthorization_) { + ErrCode_ = HTTP_UNAUTHORIZED; + return false; + } + + Digest_.setAuthorization(URL_.Get(THttpURL::FieldUsername), + URL_.Get(THttpURL::FieldPassword)); + } + + return true; +} + +/************************************************************/ +bool httpLoadAgent::setHost(const char* host_url, + const TAddrList& addrs) { + clearReader(); + + URL_.Clear(); + PersistentConn_ = true; + + long flags = THttpURL::FeatureSchemeKnown | THttpURL::FeaturesNormalizeSet; + if (HandleAuthorization_) + flags |= THttpURL::FeatureAuthSupported; + + if (URL_.Parse(host_url, flags) || !URL_.IsValidGlobal()) + return false; + + return doSetHost(addrs); +} + +/************************************************************/ +bool httpLoadAgent::startOneRequest(const char* local_url) { + clearReader(); + + THttpURL lURL; + if (lURL.Parse(local_url, THttpURL::FeaturesNormalizeSet) || lURL.IsValidGlobal()) + return false; + + URL_.SetInMemory(THttpURL::FieldPath, lURL.Get(THttpURL::FieldPath)); + URL_.SetInMemory(THttpURL::FieldQuery, lURL.Get(THttpURL::FieldQuery)); + URL_.Rewrite(); + + return doStartRequest(); +} + +/************************************************************/ +bool httpLoadAgent::doStartRequest() { + TString urlStr = URL_.PrintS(THttpURL::FlagPath | THttpURL::FlagQuery); + if (!urlStr) + urlStr = "/"; + + for (int step = 0; step < 10; step++) { + const char* digestHeader = Digest_.getHeaderInstruction(); + + unsigned i = (digestHeader) ? 2 : 1; + const char** headers = + (const char**)(alloca((i + Headers_.size()) * sizeof(char*))); + + for (i = 0; i < Headers_.size(); i++) + headers[i] = Headers_[i].c_str(); + if (digestHeader) + headers[i++] = digestHeader; + headers[i] = nullptr; + + ErrCode_ = RequestGet(urlStr.c_str(), headers, PersistentConn_); + + if (ErrCode_) { + Disconnect(); + return false; + } + + TString urlBaseStr = URL_.PrintS(THttpURL::FlagNoFrag); + + clearReader(); + Reader_ = new httpAgentReader(*this, urlBaseStr.c_str(), + !PersistentConn_, !Digest_.empty()); + + if (Reader_->readHeader()) { + //mReader->getHeader()->Print(); + if (getHeader()->http_status == HTTP_UNAUTHORIZED && + step < 1 && + Digest_.processHeader(getAuthHeader(), + urlStr.c_str(), + "GET")) { + //mReader->skipTheRest(); + delete Reader_; + Reader_ = nullptr; + ErrCode_ = 0; + Disconnect(); + continue; + } + + return true; + } + Disconnect(); + clearReader(); + + return false; + } + + ErrCode_ = HTTP_UNAUTHORIZED; + return false; +} + +/************************************************************/ +/************************************************************/ diff --git a/library/cpp/http/fetch/httpload.h b/library/cpp/http/fetch/httpload.h new file mode 100644 index 0000000000..e22e4b809e --- /dev/null +++ b/library/cpp/http/fetch/httpload.h @@ -0,0 +1,307 @@ +#pragma once + +#include "httpagent.h" +#include "httpparser.h" +#include "http_digest.h" + +#include <util/system/compat.h> +#include <util/string/vector.h> +#include <util/network/ip.h> +#include <library/cpp/uri/http_url.h> +#include <library/cpp/http/misc/httpcodes.h> + +/********************************************************/ +// Section 1: socket handlers +/********************************************************/ +// The following classes allows to adopt template scheme +// THttpAgent for work with socket by flexible +// object-style scheme. + +/********************************************************/ +// This class is used as a base one for flexible +// socket handling +class socketAbstractHandler { +public: + virtual bool Good() = 0; + + virtual int Connect(const TAddrList& addrs, TDuration Timeout) = 0; + + virtual void Disconnect() = 0; + + virtual void shutdown() = 0; + + virtual bool send(const char* message, ssize_t messlen) = 0; + + virtual bool peek() = 0; + + virtual ssize_t read(void* buffer, ssize_t buflen) = 0; + + virtual ~socketAbstractHandler() { + } + +protected: + socketAbstractHandler() { + } +}; + +/********************************************************/ +// This class is used as a proxy between THttpAgent and +// socketAbstractHandler +// (it is used by template scheme, +// so it does not have virtual methods) +class TSocketHandlerPtr { +protected: + socketAbstractHandler* Handler_; + +public: + TSocketHandlerPtr() + : Handler_(nullptr) + { + } + + virtual ~TSocketHandlerPtr() { + delete Handler_; + } + + int Good() { + return (Handler_ && Handler_->Good()); + } + + int Connect(const TAddrList& addrs, TDuration Timeout) { + return (Handler_) ? Handler_->Connect(addrs, Timeout) : 1; + } + + void Disconnect() { + if (Handler_) + Handler_->Disconnect(); + } + + void shutdown() { + if (Handler_) + Handler_->shutdown(); + } + + bool send(const char* message, ssize_t messlen) { + return (Handler_) ? Handler_->send(message, messlen) : false; + } + + virtual bool peek() { + return (Handler_) ? Handler_->peek() : false; + } + + virtual ssize_t read(void* buffer, ssize_t buflen) { + return (Handler_) ? Handler_->read(buffer, buflen) : 0; + } + + void setHandler(socketAbstractHandler* handler) { + if (Handler_) + delete Handler_; + Handler_ = handler; + } +}; + +/********************************************************/ +// Here is httpAgent that uses socketAbstractHandler class +// ant its derivatives +using httpSpecialAgent = THttpAgent<TSocketHandlerPtr>; + +/********************************************************/ +// Regular handler is used as implementation of +// socketAbstractHandler for work through HTTP protocol +class socketRegularHandler: public socketAbstractHandler { +protected: + TSimpleSocketHandler Socket_; + +public: + socketRegularHandler() + : Socket_() + { + } + + bool Good() override { + return Socket_.Good(); + } + + int Connect(const TAddrList& addrs, TDuration Timeout) override { + return Socket_.Connect(addrs, Timeout); + } + + void Disconnect() override { + Socket_.Disconnect(); + } + + void shutdown() override { + //Do not block writing to socket + //There are servers that works in a bad way with this + //mSocket.shutdown(); + } + + bool send(const char* message, ssize_t messlen) override { + return Socket_.send(message, messlen); + } + + bool peek() override { + return Socket_.peek(); + } + + ssize_t read(void* buffer, ssize_t buflen) override { + return Socket_.read(buffer, buflen); + } +}; + +/********************************************************/ +// The base factory that allows to choose an appropriate +// socketAbstractHandler implementation by url schema + +class socketHandlerFactory { +public: + virtual ~socketHandlerFactory() { + } + + //returns mHandler_HTTP for correct HTTP-based url + virtual socketAbstractHandler* chooseHandler(const THttpURL& url); + + static socketHandlerFactory sInstance; +}; + +/********************************************************/ +// Section 2: the configurates tool to parse an HTTP-response +/********************************************************/ + +class httpAgentReader: public THttpParserGeneric<1> { +protected: + THttpAuthHeader Header_; + httpSpecialAgent& Agent_; + + char* Buffer_; + void* BufPtr_; + int BufSize_; + long BufRest_; + + void readBuf(); + + bool step() { + if (BufRest_ == 0) + readBuf(); + if (eof()) + return false; + return true; + } + +public: + httpAgentReader(httpSpecialAgent& agent, + const char* baseUrl, + bool assumeConnectionClosed, + bool use_auth = false, + int bufSize = 0x1000); + + ~httpAgentReader(); + + bool eof() { + return BufRest_ < 0; + } + + int error() { + return Header_.error; + } + + void setError(int errCode) { + Header_.error = errCode; + } + + const THttpAuthHeader* getAuthHeader() { + return &Header_; + } + + const THttpHeader* readHeader(); + long readPortion(void*& buf); + bool skipTheRest(); +}; + +/********************************************************/ +// Section 3: the main class +/********************************************************/ +class httpLoadAgent: public httpSpecialAgent { +protected: + socketHandlerFactory& Factory_; + bool HandleAuthorization_; + THttpURL URL_; + bool PersistentConn_; + httpAgentReader* Reader_; + TVector<TString> Headers_; + int ErrCode_; + char* RealHost_; + httpDigestHandler Digest_; + + void clearReader(); + bool doSetHost(const TAddrList& addrs); + bool doStartRequest(); + +public: + httpLoadAgent(bool handleAuthorization = false, + socketHandlerFactory& factory = socketHandlerFactory::sInstance); + ~httpLoadAgent(); + + void setRealHost(const char* host); + void setIMS(const char* ifModifiedSince); + void addHeaderInstruction(const char* instr); + void dropHeaderInstructions(); + + bool startRequest(const char* url, + const char* url_to_merge = nullptr, + bool persistent = false, + const TAddrList& addrs = TAddrList()); + + // deprecated v4-only + bool startRequest(const char* url, + const char* url_to_merge, + bool persistent, + ui32 ip); + + bool startRequest(const THttpURL& url, + bool persistent = false, + const TAddrList& addrs = TAddrList()); + + bool setHost(const char* host_url, + const TAddrList& addrs = TAddrList()); + + bool startOneRequest(const char* local_url); + + const THttpAuthHeader* getAuthHeader() { + if (Reader_ && Reader_->getAuthHeader()->use_auth) + return Reader_->getAuthHeader(); + return nullptr; + } + + const THttpHeader* getHeader() { + if (Reader_) + return Reader_->getAuthHeader(); + return nullptr; + } + + const THttpURL& getURL() { + return URL_; + } + + bool eof() { + if (Reader_) + return Reader_->eof(); + return true; + } + + int error() { + if (ErrCode_) + return ErrCode_; + if (Reader_) + return Reader_->error(); + return HTTP_BAD_URL; + } + + long readPortion(void*& buf) { + if (Reader_) + return Reader_->readPortion(buf); + return -1; + } +}; + +/********************************************************/ diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h new file mode 100644 index 0000000000..769828e4ae --- /dev/null +++ b/library/cpp/http/fetch/httpparser.h @@ -0,0 +1,372 @@ +#pragma once + +#include "httpfsm.h" +#include "httpheader.h" + +#include <library/cpp/mime/types/mime.h> +#include <util/system/yassert.h> +#include <library/cpp/http/misc/httpcodes.h> + +template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20> +struct TFakeCheck { + bool Check(THttpHeader* /*header*/) { + return false; + } + void CheckDocPart(void* /*buf*/, size_t /*len*/, THttpHeader* /*header*/) { + } //for every part of DocumentBody will be called + void CheckEndDoc(THttpHeader* /*header*/) { + } + size_t GetMaxHeaderSize() { + return headermax; + } + size_t GetMaxBodySize(THttpHeader*) { + return bodymax; + } +}; + +class THttpParserBase { +public: + enum States { + hp_error, + hp_eof, + hp_in_header, + hp_read_alive, + hp_read_closed, + hp_begin_chunk_header, + hp_chunk_header, + hp_read_chunk + }; + + States GetState() { + return State; + } + + void setAssumeConnectionClosed(int value) { + AssumeConnectionClosed = value; + } + + THttpHeader* GetHttpHeader() const { + return Header; + } + +protected: + int CheckHeaders() { + if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) { + Header->content_length = 0; + Header->transfer_chunked = 0; + } + if (Header->transfer_chunked < -1) { + Header->error = HTTP_BAD_ENCODING; + return 1; + } else if (Header->transfer_chunked == -1) { + Header->transfer_chunked = 0; + } + if (!Header->transfer_chunked && Header->content_length < -1) { + Header->error = HTTP_BAD_CONTENT_LENGTH; + return 1; + } + if (Header->http_status == HTTP_OK) { + if (Header->compression_method != HTTP_COMPRESSION_UNSET && + Header->compression_method != HTTP_COMPRESSION_IDENTITY && + Header->compression_method != HTTP_COMPRESSION_GZIP && + Header->compression_method != HTTP_COMPRESSION_DEFLATE) + { + Header->error = HTTP_BAD_CONTENT_ENCODING; + return 1; + } + } + if (Header->connection_closed == -1) + Header->connection_closed = (Header->http_minor == 0 || + AssumeConnectionClosed); + if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) { + Header->error = HTTP_LENGTH_UNKNOWN; + return 1; + } + if (Header->http_time < 0) + Header->http_time = 0; + if (Header->mime_type < 0) + Header->mime_type = MIME_UNKNOWN; + return 0; + } + + THttpHeaderParser HeaderParser; + THttpChunkParser ChunkParser; + States State; + long ChunkSize; + THttpHeader* Header; + int AssumeConnectionClosed; + bool HeadRequest; +}; + +template <int isReader, typename TCheck = TFakeCheck<>> +class THttpParserGeneric: public THttpParserBase, public TCheck { +protected: + long ParseGeneric(void*& buf, long& size) { + if (!size) { + switch (State) { + case hp_error: + case hp_eof: + break; + case hp_read_closed: + State = hp_eof; + break; + case hp_in_header: + Header->error = HTTP_HEADER_EOF; + State = hp_error; + break; + case hp_read_alive: + case hp_read_chunk: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_MESSAGE_EOF; + State = hp_error; + } + break; + case hp_begin_chunk_header: + case hp_chunk_header: + if (HeadRequest) + State = hp_eof; + else { + Header->error = HTTP_CHUNK_EOF; + State = hp_error; + } + break; + } + return 0; + } + while (size) { + int ret; + + switch (State) { + case hp_error: + return 0; + + case hp_eof: + return 0; + + case hp_in_header: + if ((ret = HeaderParser.Execute(buf, size)) < 0) { + Header->error = HTTP_BAD_HEADER_STRING; + State = hp_error; + return 0; + } else if (ret == 2) { + Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1); + size -= long(HeaderParser.lastchar - (char*)buf + 1); + buf = HeaderParser.lastchar + 1; + State = CheckHeaders() ? hp_error + : Header->transfer_chunked ? hp_begin_chunk_header + : Header->content_length == 0 ? hp_eof + : Header->content_length > 0 ? hp_read_alive + : hp_read_closed; + if (State == hp_begin_chunk_header) { + // unget \n for chunk reader + buf = (char*)buf - 1; + size++; + } + if (isReader) + return size; + } else { + Header->header_size += size; + size = 0; + } + break; + + case hp_read_alive: + Header->entity_size += size; + if (Header->entity_size >= Header->content_length) { + State = hp_eof; + } + + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_read_closed: + Header->entity_size += size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + break; + + case hp_begin_chunk_header: + ChunkParser.Init(); + State = hp_chunk_header; + [[fallthrough]]; + + case hp_chunk_header: + if ((ret = ChunkParser.Execute(buf, size)) < 0) { + Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK); + State = hp_error; + return 0; + } else if (ret == 2) { + Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1); + size -= long(ChunkParser.lastchar - (char*)buf + 1); + buf = ChunkParser.lastchar + 1; + ChunkSize = ChunkParser.chunk_length; + Y_ASSERT(ChunkSize >= 0); + State = ChunkSize ? hp_read_chunk : hp_eof; + } else { + Header->entity_size += size; + size = 0; + } + break; + + case hp_read_chunk: + if (size >= ChunkSize) { + Header->entity_size += ChunkSize; + State = hp_begin_chunk_header; + TCheck::CheckDocPart(buf, ChunkSize, Header); + if (isReader) + return ChunkSize; + size -= ChunkSize; + buf = (char*)buf + ChunkSize; + } else { + Header->entity_size += size; + ChunkSize -= size; + TCheck::CheckDocPart(buf, size, Header); + if (isReader) + return size; + size = 0; + } + break; + } + } + return size; + } +}; + +template <class TCheck = TFakeCheck<>> +class THttpParser: public THttpParserGeneric<0, TCheck> { + typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG! +public: + void Init(THttpHeader* H, bool head_request = false) { + TBaseT::Header = H; + TBaseT::HeaderParser.Init(TBaseT::Header); + TBaseT::State = TBaseT::hp_in_header; + TBaseT::AssumeConnectionClosed = 0; + TBaseT::HeadRequest = head_request; + } + + void Parse(void* buf, long size) { + TBaseT::ParseGeneric(buf, size); + } +}; + +class TMemoReader { +public: + int Init(void* buf, long bufsize) { + Buf = buf; + Bufsize = bufsize; + return 0; + } + long Read(void*& buf) { + Y_ASSERT(Bufsize >= 0); + if (!Bufsize) { + Bufsize = -1; + return 0; + } + buf = Buf; + long ret = Bufsize; + Bufsize = 0; + return ret; + } + +protected: + long Bufsize; + void* Buf; +}; + +template <class Reader> +class THttpReader: public THttpParserGeneric<1>, public Reader { + typedef THttpParserGeneric<1> TBaseT; + +public: + using TBaseT::AssumeConnectionClosed; + using TBaseT::Header; + using TBaseT::ParseGeneric; + using TBaseT::State; + + int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) { + Header = H; + Eoferr = 1; + Size = 0; + AssumeConnectionClosed = assumeConnectionClosed; + HeadRequest = headRequest; + return parsHeader ? ParseHeader() : SkipHeader(); + } + + long Read(void*& buf) { + long Chunk; + do { + if (!Size) { + if (Eoferr != 1) + return Eoferr; + else if ((Size = (long)Reader::Read(Ptr)) < 0) { + Header->error = HTTP_CONNECTION_LOST; + return Eoferr = -1; + } + } + Chunk = ParseGeneric(Ptr, Size); + buf = Ptr; + Ptr = (char*)Ptr + Chunk; + Size -= Chunk; + if (State == hp_eof) { + Size = 0; + Eoferr = 0; + } else if (State == hp_error) + return Eoferr = -1; + } while (!Chunk); + return Chunk; + } + +protected: + int ParseHeader() { + HeaderParser.Init(Header); + State = hp_in_header; + while (State == hp_in_header) { + if ((Size = (long)Reader::Read(Ptr)) < 0) + return Eoferr = -1; + ParseGeneric(Ptr, Size); + } + if (State == hp_error) + return Eoferr = -1; + if (State == hp_eof) + Eoferr = 0; + return 0; + } + + int SkipHeader() { + long hdrsize = Header->header_size; + while (hdrsize) { + if ((Size = (long)Reader::Read(Ptr)) <= 0) + return Eoferr = -1; + if (Size >= hdrsize) { + Size -= hdrsize; + Ptr = (char*)Ptr + hdrsize; + break; + } + hdrsize -= Size; + } + State = Header->transfer_chunked ? hp_begin_chunk_header + : Header->content_length == 0 ? hp_eof + : Header->content_length > 0 ? hp_read_alive + : hp_read_closed; + Header->entity_size = 0; + if (State == hp_eof) + Eoferr = 0; + else if (State == hp_begin_chunk_header) { + // unget \n for chunk reader + Ptr = (char*)Ptr - 1; + ++Size; + } + return 0; + } + + void* Ptr; + long Size; + int Eoferr; +}; diff --git a/library/cpp/http/fetch/httpparser_ut.cpp b/library/cpp/http/fetch/httpparser_ut.cpp new file mode 100644 index 0000000000..3b3b938e7a --- /dev/null +++ b/library/cpp/http/fetch/httpparser_ut.cpp @@ -0,0 +1,231 @@ +#include "httpparser.h" + +#include <library/cpp/testing/unittest/registar.h> + +#define ENUM_OUT(arg) \ + case type ::arg: { \ + out << #arg; \ + return; \ + } + +template <> +void Out<THttpParserBase::States>(IOutputStream& out, THttpParserBase::States st) { + using type = THttpParserBase::States; + switch (st) { + ENUM_OUT(hp_error) + ENUM_OUT(hp_eof) + ENUM_OUT(hp_in_header) + ENUM_OUT(hp_read_alive) + ENUM_OUT(hp_read_closed) + ENUM_OUT(hp_begin_chunk_header) + ENUM_OUT(hp_chunk_header) + ENUM_OUT(hp_read_chunk) + } +} + +namespace { + class TSomethingLikeFakeCheck; + + using TTestHttpParser = THttpParser<TSomethingLikeFakeCheck>; + + class TSomethingLikeFakeCheck { + TString Body_; + + public: + const TString& Body() const { + return Body_; + } + + // other functions are not really called by THttpParser + void CheckDocPart(const void* buf, size_t len, THttpHeader* /* header */) { + TString s(static_cast<const char*>(buf), len); + Cout << "State = " << static_cast<TTestHttpParser*>(this)->GetState() << ", CheckDocPart(" << s.Quote() << ")\n"; + Body_ += s; + } + }; + +} + +Y_UNIT_TEST_SUITE(TestHttpParser) { + Y_UNIT_TEST(TestTrivialRequest) { + const TString blob{ + "GET /search?q=hi HTTP/1.1\r\n" + "Host: www.google.ru:8080 \r\n" + "\r\n"}; + THttpHeader hdr; + THttpParser<> parser; + parser.Init(&hdr); + parser.Parse((void*)blob.data(), blob.size()); + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_error); // can't parse request as response + } + + // XXX: `entity_size` is i32 and `content_length` is i64! + Y_UNIT_TEST(TestTrivialResponse) { + const TString blob{ + "HTTP/1.1 200 Ok\r\n" + "Content-Length: 2\r\n" + "\r\n" + "OK"}; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + parser.Parse((void*)blob.data(), blob.size()); + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_eof); + UNIT_ASSERT_EQUAL(parser.Body(), "OK"); + UNIT_ASSERT_EQUAL(hdr.header_size, strlen( + "HTTP/1.1 200 Ok\r\n" + "Content-Length: 2\r\n" + "\r\n")); + UNIT_ASSERT_EQUAL(hdr.entity_size, strlen("OK")); + } + + // XXX: `entity_size` is off by one in TE:chunked case. + Y_UNIT_TEST(TestChunkedResponse) { + const TString blob{ + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "Ok\r\n" + "8\r\n" + "AllRight\r\n" + "0\r\n" + "\r\n"}; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + parser.Parse((void*)blob.data(), blob.size()); + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_eof); + UNIT_ASSERT_EQUAL(parser.Body(), "OkAllRight"); + UNIT_ASSERT_EQUAL(hdr.header_size, strlen( + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n")); + const int off_by_one_err = -1; // XXX: it really looks so + UNIT_ASSERT_EQUAL(hdr.entity_size + off_by_one_err, strlen( + "2\r\n" + "Ok\r\n" + "8\r\n" + "AllRight\r\n" + "0\r\n" + "\r\n")); + } + + static const TString PipelineClenBlob_{ + "HTTP/1.1 200 Ok\r\n" + "Content-Length: 4\r\n" + "\r\n" + "OK\r\n" + "HTTP/1.1 200 Zz\r\n" + "Content-Length: 4\r\n" + "\r\n" + "ZZ\r\n"}; + + void AssertPipelineClen(TTestHttpParser & parser, const THttpHeader& hdr) { + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_eof); + UNIT_ASSERT_EQUAL(4, hdr.content_length); + UNIT_ASSERT_EQUAL(hdr.header_size, strlen( + "HTTP/1.1 200 Ok\r\n" + "Content-Length: 4\r\n" + "\r\n")); + } + + Y_UNIT_TEST(TestPipelineClenByteByByte) { + const TString& blob = PipelineClenBlob_; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + for (size_t i = 0; i < blob.size(); ++i) { + const TStringBuf d{blob, i, 1}; + parser.Parse((void*)d.data(), d.size()); + Cout << TString(d).Quote() << " -> " << parser.GetState() << Endl; + } + AssertPipelineClen(parser, hdr); + UNIT_ASSERT_EQUAL(parser.Body(), "OK\r\n"); + UNIT_ASSERT_EQUAL(hdr.entity_size, hdr.content_length); + } + + // XXX: Content-Length is ignored, Body() looks unexpected! + Y_UNIT_TEST(TestPipelineClenOneChunk) { + const TString& blob = PipelineClenBlob_; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + parser.Parse((void*)blob.data(), blob.size()); + AssertPipelineClen(parser, hdr); + UNIT_ASSERT_EQUAL(parser.Body(), + "OK\r\n" + "HTTP/1.1 200 Zz\r\n" + "Content-Length: 4\r\n" + "\r\n" + "ZZ\r\n"); + UNIT_ASSERT_EQUAL(hdr.entity_size, strlen( + "OK\r\n" + "HTTP/1.1 200 Zz\r\n" + "Content-Length: 4\r\n" + "\r\n" + "ZZ\r\n")); + } + + static const TString PipelineChunkedBlob_{ + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "Ok\r\n" + "8\r\n" + "AllRight\r\n" + "0\r\n" + "\r\n" + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "Yo\r\n" + "8\r\n" + "uWin!Iam\r\n" + "0\r\n" + "\r\n"}; + + void AssertPipelineChunked(TTestHttpParser & parser, const THttpHeader& hdr) { + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_eof); + UNIT_ASSERT_EQUAL(parser.Body(), "OkAllRight"); + UNIT_ASSERT_EQUAL(-1, hdr.content_length); + UNIT_ASSERT_EQUAL(hdr.header_size, strlen( + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n")); + const int off_by_one_err = -1; + UNIT_ASSERT_EQUAL(hdr.entity_size + off_by_one_err, strlen( + "2\r\n" + "Ok\r\n" + "8\r\n" + "AllRight\r\n" + "0\r\n" + "\r\n")); + } + + Y_UNIT_TEST(TestPipelineChunkedByteByByte) { + const TString& blob = PipelineChunkedBlob_; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + for (size_t i = 0; i < blob.size(); ++i) { + const TStringBuf d{blob, i, 1}; + parser.Parse((void*)d.data(), d.size()); + Cout << TString(d).Quote() << " -> " << parser.GetState() << Endl; + if (blob.size() / 2 - 1 <= i) // last \n sets EOF + UNIT_ASSERT_EQUAL(parser.GetState(), parser.hp_eof); + } + AssertPipelineChunked(parser, hdr); + } + + Y_UNIT_TEST(TestPipelineChunkedOneChunk) { + const TString& blob = PipelineChunkedBlob_; + THttpHeader hdr; + TTestHttpParser parser; + parser.Init(&hdr); + parser.Parse((void*)blob.data(), blob.size()); + AssertPipelineChunked(parser, hdr); + } +} diff --git a/library/cpp/http/fetch/httpzreader.h b/library/cpp/http/fetch/httpzreader.h new file mode 100644 index 0000000000..68eb00853d --- /dev/null +++ b/library/cpp/http/fetch/httpzreader.h @@ -0,0 +1,295 @@ +#pragma once + +#include "httpheader.h" +#include "httpparser.h" +#include "exthttpcodes.h" + +#include <util/system/defaults.h> +#include <util/generic/yexception.h> + +#include <contrib/libs/zlib/zlib.h> + +#include <errno.h> + +#ifndef ENOTSUP +#define ENOTSUP 45 +#endif + +template <class Reader> +class TCompressedHttpReader: public THttpReader<Reader> { + typedef THttpReader<Reader> TBase; + +public: + using TBase::AssumeConnectionClosed; + using TBase::Header; + using TBase::ParseGeneric; + using TBase::State; + + static constexpr size_t DefaultBufSize = 64 << 10; + static constexpr unsigned int DefaultWinSize = 15; + + TCompressedHttpReader() + : CompressedInput(false) + , BufSize(0) + , CurContSize(0) + , MaxContSize(0) + , Buf(nullptr) + , ZErr(0) + , ConnectionClosed(0) + , IgnoreTrailingGarbage(true) + { + memset(&Stream, 0, sizeof(Stream)); + } + + ~TCompressedHttpReader() { + ClearStream(); + + if (Buf) { + free(Buf); + Buf = nullptr; + } + } + + void SetConnectionClosed(int cc) { + ConnectionClosed = cc; + } + + void SetIgnoreTrailingGarbage(bool ignore) { + IgnoreTrailingGarbage = ignore; + } + + int Init( + THttpHeader* H, + int parsHeader, + const size_t maxContSize = Max<size_t>(), + const size_t bufSize = DefaultBufSize, + const unsigned int winSize = DefaultWinSize, + bool headRequest = false) + { + ZErr = 0; + CurContSize = 0; + MaxContSize = maxContSize; + + int ret = TBase::Init(H, parsHeader, ConnectionClosed, headRequest); + if (ret) + return ret; + + ret = SetCompression(H->compression_method, bufSize, winSize); + return ret; + } + + long Read(void*& buf) { + if (!CompressedInput) { + long res = TBase::Read(buf); + if (res > 0) { + CurContSize += (size_t)res; + if (CurContSize > MaxContSize) { + ZErr = E2BIG; + return -1; + } + } + return res; + } + + while (true) { + if (Stream.avail_in == 0) { + void* tmpin = Stream.next_in; + long res = TBase::Read(tmpin); + Stream.next_in = (Bytef*)tmpin; + if (res <= 0) + return res; + Stream.avail_in = (uInt)res; + } + + Stream.next_out = Buf; + Stream.avail_out = (uInt)BufSize; + buf = Buf; + + int err = inflate(&Stream, Z_SYNC_FLUSH); + + //Y_ASSERT(Stream.avail_in == 0); + + switch (err) { + case Z_OK: + // there is no data in next_out yet + if (BufSize == Stream.avail_out) + continue; + [[fallthrough]]; // don't break or return; continue with Z_STREAM_END case + + case Z_STREAM_END: + if (Stream.total_out > MaxContSize) { + ZErr = E2BIG; + return -1; + } + if (!IgnoreTrailingGarbage && BufSize == Stream.avail_out && Stream.avail_in > 0) { + Header->error = EXT_HTTP_GZIPERROR; + ZErr = EFAULT; + Stream.msg = (char*)"trailing garbage"; + return -1; + } + return long(BufSize - Stream.avail_out); + + case Z_NEED_DICT: + case Z_DATA_ERROR: + Header->error = EXT_HTTP_GZIPERROR; + ZErr = EFAULT; + return -1; + + case Z_MEM_ERROR: + ZErr = ENOMEM; + return -1; + + default: + ZErr = EINVAL; + return -1; + } + } + + return -1; + } + + const char* ZMsg() const { + return Stream.msg; + } + + int ZError() const { + return ZErr; + } + + size_t GetCurContSize() const { + return CompressedInput ? Stream.total_out : CurContSize; + } + +protected: + int SetCompression(const int compression, const size_t bufSize, + const unsigned int winSize) { + ClearStream(); + + int winsize = winSize; + switch ((enum HTTP_COMPRESSION)compression) { + case HTTP_COMPRESSION_UNSET: + case HTTP_COMPRESSION_IDENTITY: + CompressedInput = false; + return 0; + case HTTP_COMPRESSION_GZIP: + CompressedInput = true; + winsize += 16; // 16 indicates gzip, see zlib.h + break; + case HTTP_COMPRESSION_DEFLATE: + CompressedInput = true; + winsize = -winsize; // negative indicates raw deflate stream, see zlib.h + break; + case HTTP_COMPRESSION_COMPRESS: + case HTTP_COMPRESSION_ERROR: + default: + CompressedInput = false; + ZErr = ENOTSUP; + return -1; + } + + if (bufSize != BufSize) { + if (Buf) + free(Buf); + Buf = (ui8*)malloc(bufSize); + if (!Buf) { + ZErr = ENOMEM; + return -1; + } + BufSize = bufSize; + } + + int err = inflateInit2(&Stream, winsize); + switch (err) { + case Z_OK: + Stream.total_in = 0; + Stream.total_out = 0; + Stream.avail_in = 0; + return 0; + + case Z_DATA_ERROR: // never happens, see zlib.h + CompressedInput = false; + ZErr = EFAULT; + return -1; + + case Z_MEM_ERROR: + CompressedInput = false; + ZErr = ENOMEM; + return -1; + + default: + CompressedInput = false; + ZErr = EINVAL; + return -1; + } + } + + void ClearStream() { + if (CompressedInput) { + inflateEnd(&Stream); + CompressedInput = false; + } + } + + z_stream Stream; + bool CompressedInput; + size_t BufSize; + size_t CurContSize, MaxContSize; + ui8* Buf; + int ZErr; + int ConnectionClosed; + bool IgnoreTrailingGarbage; +}; + +class zlib_exception: public yexception { +}; + +template <class Reader> +class SCompressedHttpReader: public TCompressedHttpReader<Reader> { + typedef TCompressedHttpReader<Reader> TBase; + +public: + using TBase::ZError; + using TBase::ZMsg; + + SCompressedHttpReader() + : TBase() + { + } + + int Init( + THttpHeader* H, + int parsHeader, + const size_t maxContSize = Max<size_t>(), + const size_t bufSize = TBase::DefaultBufSize, + const unsigned int winSize = TBase::DefaultWinSize, + bool headRequest = false) + { + int ret = TBase::Init(H, parsHeader, maxContSize, bufSize, winSize, headRequest); + return (int)HandleRetValue((long)ret); + } + + long Read(void*& buf) { + long ret = TBase::Read(buf); + return HandleRetValue(ret); + } + +protected: + long HandleRetValue(long ret) { + switch (ZError()) { + case 0: + return ret; + case ENOMEM: + ythrow yexception() << "SCompressedHttpReader: not enough memory"; + case EINVAL: + ythrow yexception() << "SCompressedHttpReader: zlib error: " << ZMsg(); + case ENOTSUP: + ythrow yexception() << "SCompressedHttpReader: unsupported compression method"; + case EFAULT: + ythrow zlib_exception() << "SCompressedHttpReader: " << ZMsg(); + case E2BIG: + ythrow zlib_exception() << "SCompressedHttpReader: Content exceeds maximum length"; + default: + ythrow yexception() << "SCompressedHttpReader: unknown error"; + } + } +}; diff --git a/library/cpp/http/fetch/library-htfetch_ut_hreflang_in.h b/library/cpp/http/fetch/library-htfetch_ut_hreflang_in.h new file mode 100644 index 0000000000..0df89bdc79 --- /dev/null +++ b/library/cpp/http/fetch/library-htfetch_ut_hreflang_in.h @@ -0,0 +1,155 @@ +#pragma once + +char hreflang_ut_in[] = "HTTP/1.1 200 OK\n" + "Date: Thu, 15 Nov 2012 22:38:28 GMT\n" + "Server: Apache/2\n" + "X-Powered-By: PHP/5.2.17\n" + "Set-Cookie: PHPSESSID=6d69474d1cc019d7d82714c9472bc6d6; path=/\n" + "Expires: Thu, 19 Nov 1981 08:52:00 GMT\n" + "Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0\n" + "Pragma: no-cache\n" + "Link: <http://www.forexticket.cn.com/zh/currency/converter-EEK-XAG>; rel='alternate'; hreflang='zh-CN'\n" + "Link: <http://www.forexticket.tw/zh/currency/converter-EEK-XAG>; rel='alternate'; hreflang='zh-TW'\n" + "Link: <http://www.forexticket.hk/zh/currency/converter-EEK-XAG>; rel='alternate'; hreflang='zh-HK'\n" + "Link: <http://www.forexticket.sg/zh/currency/converter-EEK-XAG>; rel='alternate'; hreflang='zh-SG'\n" + "Link: <http://www.forexticket.in/hi/currency/converter-EEK-XAG>; rel='alternate'; hreflang='hi-IN'\n" + "Link: <http://www.forexticket.com.fj/hi/currency/converter-EEK-XAG>; rel='alternate'; hreflang='hi-FJ'\n" + "Link: <http://www.forexticket.in/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-IN'\n" + "Link: <http://www.forexticket.us/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-US'\n" + "Link: <http://www.forexticket.com.pk/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-PK'\n" + "Link: <http://www.forexticket-bd.com/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-BD'\n" + "Link: <http://www.forexticket-ng.com/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-NG'\n" + "Link: <http://www.forexticket.co.uk/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-GB'\n" + "Link: <http://www.forexticket.co.za/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-ZA'\n" + "Link: <http://www.forexticket.co.ke/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-KE'\n" + "Link: <http://www.forexticket.com/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-CA'\n" + "Link: <http://www.forexticket-gh.com/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-GH'\n" + "Link: <http://www.forexticket.biz/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-AU'\n" + "Link: <http://www.forexticket.cm/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-CM'\n" + "Link: <http://www.forexticket-kh.com/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-KH'\n" + "Link: <http://www.forexticket.hk/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-HK'\n" + "Link: <http://www.forexticket.la/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-LA'\n" + "Link: <http://www.forexticket.sg/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-SG'\n" + "Link: <http://www.forexticket.co.nz/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-NZ'\n" + "Link: <http://www.forexticket.com.pr/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-PR'\n" + "Link: <http://www.forexticket.com.fj/en/currency/converter-EEK-XAG>; rel='alternate'; hreflang='en-FJ'\n" + "Link: <http://www.forexticket.us/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-US'\n" + "Link: <http://www.forexticket.mx/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-MX'\n" + "Link: <http://www.forexticket.co/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-CO'\n" + "Link: <http://www.forexticket.com.ar/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-AR'\n" + "Link: <http://www.forexticket-pe.com/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-PE'\n" + "Link: <http://www.forexticket.co.ve/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-VE'\n" + "Link: <http://www.forexticket.cl/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-CL'\n" + "Link: <http://www.forexticket.ec/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-EC'\n" + "Link: <http://www.forexticket.com.gt/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-GT'\n" + "Link: <http://www.forexticket.bo/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-BO'\n" + "Link: <http://www.forexticket.hn/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-HN'\n" + "Link: <http://www.forexticket.com.py/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-PY'\n" + "Link: <http://www.forexticket.es/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-ES'\n" + "Link: <http://www.forexticket.com.sv/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-SV'\n" + "Link: <http://www.forexticket.com.ni/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-NI'\n" + "Link: <http://www.forexticket.co.cr/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-CR'\n" + "Link: <http://www.forexticket.com.pr/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-PR'\n" + "Link: <http://www.forexticket.com.uy/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-UY'\n" + "Link: <http://www.forexticket.com.pa/es/cambio/divisas-EEK-XAG>; rel='alternate'; hreflang='es-PA'\n" + "Link: <http://www.forexticket.asia.com/id/currency/converter-EEK-XAG>; rel='alternate'; hreflang='id-ID'\n" + "Link: <http://www.forexticket.com.br/pt/moeda/conversor-EEK-XAG>; rel='alternate'; hreflang='pt-BR'\n" + "Link: <http://www.forexticket-mz.com/pt/moeda/conversor-EEK-XAG>; rel='alternate'; hreflang='pt-MZ'\n" + "Link: <http://www.forexticket.com.pt/pt/moeda/conversor-EEK-XAG>; rel='alternate'; hreflang='pt-PT'\n" + "Link: <http://www.forexticket.tl/pt/moeda/conversor-EEK-XAG>; rel='alternate'; hreflang='pt-TL'\n" + "Link: <http://www.forexticket.ru/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-RU'\n" + "Link: <http://www.forexticket-kz.com/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-KZ'\n" + "Link: <http://www.forexticket-tj.com/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-TJ'\n" + "Link: <http://www.forexticket-kg.com/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-KG'\n" + "Link: <http://www.forexticket-ge.com/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-GE'\n" + "Link: <http://www.forexticket.mn/ru/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ru-MN'\n" + "Link: <http://www.forexticket.jp/ja/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ja-JP'\n" + "Link: <http://www.forexticket-ph.com/tl/currency/converter-EEK-XAG>; rel='alternate'; hreflang='tl-PH'\n" + "Link: <http://www.forexticket.vn/vi/currency/converter-EEK-XAG>; rel='alternate'; hreflang='vi-VN'\n" + "Link: <http://www.forexticket.de/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-DE'\n" + "Link: <http://www.forexticket.be/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-BE'\n" + "Link: <http://www.forexticket.at/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-AT'\n" + "Link: <http://www.forexticket.ch/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-CH'\n" + "Link: <http://www.forexticket.lu/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-LU'\n" + "Link: <http://www.forexticket.li/de/waehrungsumrechner/devisen-EEK-XAG>; rel='alternate'; hreflang='de-LI'\n" + "Link: <http://www.forexticket.de/de/waehrungsumrechner/devisen-EEK-XAG>; rel='canonical'\n" + "Link: <http://www.forexticket-eg.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-EG'\n" + "Link: <http://www.forexticket-dz.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-DZ'\n" + "Link: <http://www.forexticket-ma.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-MA'\n" + "Link: <http://www.forexticket-iq.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-IQ'\n" + "Link: <http://www.forexticket-sa.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-SA'\n" + "Link: <http://www.forexticket-sy.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-SY'\n" + "Link: <http://www.forexticket-tn.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-TN'\n" + "Link: <http://www.forexticket-td.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-TD'\n" + "Link: <http://www.forexticket-so.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-SO'\n" + "Link: <http://www.forexticket.co.il/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-IL'\n" + "Link: <http://www.forexticket-jo.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-JO'\n" + "Link: <http://www.forexticket.ae/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-AE'\n" + "Link: <http://www.forexticket-lb.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-LB'\n" + "Link: <http://www.forexticket-om.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-OM'\n" + "Link: <http://www.forexticket-kw.com/ar/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ar-KW'\n" + "Link: <http://www.forexticket-tr.com/tr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='tr-TR'\n" + "Link: <http://www.forexticket-bg.com/tr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='tr-BG'\n" + "Link: <http://www.forexticket-cy.com/tr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='tr-CY'\n" + "Link: <http://www.forexticket.ir/fa/currency/converter-EEK-XAG>; rel='alternate'; hreflang='fa-IR'\n" + "Link: <http://www.forexticket.af/fa/currency/converter-EEK-XAG>; rel='alternate'; hreflang='fa-AF'\n" + "Link: <http://www.forexticket.cd/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-CD'\n" + "Link: <http://www.forexticket.fr/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-FR'\n" + "Link: <http://www.forexticket.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-CA'\n" + "Link: <http://www.forexticket.mg/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-MG'\n" + "Link: <http://www.forexticket.cm/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-CM'\n" + "Link: <http://www.forexticket-kh.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-KH'\n" + "Link: <http://www.forexticket-ml.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-ML'\n" + "Link: <http://www.forexticket-sn.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-SN'\n" + "Link: <http://www.forexticket-tn.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-TN'\n" + "Link: <http://www.forexticket-td.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-TD'\n" + "Link: <http://www.forexticket.be/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-BE'\n" + "Link: <http://www.forexticket-gn.com/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-GN'\n" + "Link: <http://www.forexticket.ht/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-HT'\n" + "Link: <http://www.forexticket.ch/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-CH'\n" + "Link: <http://www.forexticket.la/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-LA'\n" + "Link: <http://www.forexticket.lu/fr/conversion/monnaie-EEK-XAG>; rel='alternate'; hreflang='fr-LU'\n" + "Link: <http://www.forexticket-th.com/th/currency/converter-EEK-XAG>; rel='alternate'; hreflang='th-TH'\n" + "Link: <http://www.forexticket.co.uk/cy/currency/converter-EEK-XAG>; rel='alternate'; hreflang='cy-GB'\n" + "Link: <http://www.forexticket.co.uk/ga/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ga-GB'\n" + "Link: <http://www.forexticket.it/it/convertitore/valuta-EEK-XAG>; rel='alternate'; hreflang='it-IT'\n" + "Link: <http://www.forexticket.ch/it/convertitore/valuta-EEK-XAG>; rel='alternate'; hreflang='it-CH'\n" + "Link: <http://www.forexticket.co.za/af/currency/converter-EEK-XAG>; rel='alternate'; hreflang='af-ZA'\n" + "Link: <http://www.forexticket.kr/ko/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ko-KR'\n" + "Link: <http://www.forexticket-ua.com/uk/currency/converter-EEK-XAG>; rel='alternate'; hreflang='uk-UA'\n" + "Link: <http://www.forexticket-tz.com/sw/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sw-TZ'\n" + "Link: <http://www.forexticket.co.ke/sw/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sw-KE'\n" + "Link: <http://www.forexticket.pl/pl/currency/converter-EEK-XAG>; rel='alternate'; hreflang='pl-PL'\n" + "Link: <http://www.forexticket.com.my/ms/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ms-MY'\n" + "Link: <http://www.forexticket.sg/ms/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ms-SG'\n" + "Link: <http://www.forexticket.ro/ro/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ro-RO'\n" + "Link: <http://www.forexticket.nl/nl/currency/converter-EEK-XAG>; rel='alternate'; hreflang='nl-NL'\n" + "Link: <http://www.forexticket.be/nl/currency/converter-EEK-XAG>; rel='alternate'; hreflang='nl-BE'\n" + "Link: <http://www.forexticket.gr/el/currency/converter-EEK-XAG>; rel='alternate'; hreflang='el-GR'\n" + "Link: <http://www.forexticket-al.com/el/currency/converter-EEK-XAG>; rel='alternate'; hreflang='el-AL'\n" + "Link: <http://www.forexticket-cy.com/el/currency/converter-EEK-XAG>; rel='alternate'; hreflang='el-CY'\n" + "Link: <http://www.forexticket.cz/cs/currency/converter-EEK-XAG>; rel='alternate'; hreflang='cs-CZ'\n" + "Link: <http://www.forexticket.hu/hu/currency/converter-EEK-XAG>; rel='alternate'; hreflang='hu-HU'\n" + "Link: <http://www.forexticket.se/sv/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sv-SE'\n" + "Link: <http://www.forexticket.eu/sv/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sv-FI'\n" + "Link: <http://www.forexticket.co.il/iw/currency/converter-EEK-XAG>; rel='alternate'; hreflang='iw-IL'\n" + "Link: <http://www.forexticket.co.il/yi/currency/converter-EEK-XAG>; rel='alternate'; hreflang='yi-IL'\n" + "Link: <http://www.forexticket-bg.com/bg/currency/converter-EEK-XAG>; rel='alternate'; hreflang='bg-BG'\n" + "Link: <http://www.forexticket.es/ca/currency/converter-EEK-XAG>; rel='alternate'; hreflang='ca-ES'\n" + "Link: <http://www.forexticket.es/gl/currency/converter-EEK-XAG>; rel='alternate'; hreflang='gl-ES'\n" + "Link: <http://www.forexticket.dk/da/currency/converter-EEK-XAG>; rel='alternate'; hreflang='da-DK'\n" + "Link: <http://www.forexticket.eu/fi/currency/converter-EEK-XAG>; rel='alternate'; hreflang='fi-FI'\n" + "Link: <http://www.forexticket-hr.com/hr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='hr-HR'\n" + "Link: <http://www.forexticket-hr.com/sr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sr-HR'\n" + "Link: <http://www.forexticket.me/sr/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sr-ME'\n" + "Link: <http://www.forexticket.lt/lt/currency/converter-EEK-XAG>; rel='alternate'; hreflang='lt-LT'\n" + "Link: <http://www.forexticket-al.com/sq/currency/converter-EEK-XAG>; rel='alternate'; hreflang='sq-AL'\n" + "Link: <http://www.forexticket.lv/lv/currency/converter-EEK-XAG>; rel='alternate'; hreflang='lv-LV'\n" + "Link: <http://www.forexticket.co.ee/et/currency/converter-EEK-XAG>; rel='alternate'; hreflang='et-EE'\n" + "Vary: Accept-Encoding,User-Agent\n" + "Content-Encoding: gzip\n" + "Keep-Alive: timeout=1, max=100\n" + "Connection: Keep-Alive\n" + "Transfer-Encoding: chunked\n" + "Content-Type: text/html\n" + "\n"; diff --git a/library/cpp/http/fetch/library-htfetch_ut_hreflang_out.h b/library/cpp/http/fetch/library-htfetch_ut_hreflang_out.h new file mode 100644 index 0000000000..bef8bacff5 --- /dev/null +++ b/library/cpp/http/fetch/library-htfetch_ut_hreflang_out.h @@ -0,0 +1,3 @@ +#pragma once + +char hreflang_ut_out[] = "zh-CN http://www.forexticket.cn.com/zh/currency/converter-EEK-XAG\tzh-TW http://www.forexticket.tw/zh/currency/converter-EEK-XAG\tzh-HK http://www.forexticket.hk/zh/currency/converter-EEK-XAG\tzh-SG http://www.forexticket.sg/zh/currency/converter-EEK-XAG\thi-IN http://www.forexticket.in/hi/currency/converter-EEK-XAG\thi-FJ http://www.forexticket.com.fj/hi/currency/converter-EEK-XAG\ten-IN http://www.forexticket.in/en/currency/converter-EEK-XAG\ten-US http://www.forexticket.us/en/currency/converter-EEK-XAG\ten-PK http://www.forexticket.com.pk/en/currency/converter-EEK-XAG\ten-BD http://www.forexticket-bd.com/en/currency/converter-EEK-XAG\ten-NG http://www.forexticket-ng.com/en/currency/converter-EEK-XAG\ten-GB http://www.forexticket.co.uk/en/currency/converter-EEK-XAG\ten-ZA http://www.forexticket.co.za/en/currency/converter-EEK-XAG\ten-KE http://www.forexticket.co.ke/en/currency/converter-EEK-XAG\ten-CA http://www.forexticket.com/en/currency/converter-EEK-XAG\ten-GH http://www.forexticket-gh.com/en/currency/converter-EEK-XAG\ten-AU http://www.forexticket.biz/en/currency/converter-EEK-XAG\ten-CM http://www.forexticket.cm/en/currency/converter-EEK-XAG\ten-KH http://www.forexticket-kh.com/en/currency/converter-EEK-XAG\ten-HK http://www.forexticket.hk/en/currency/converter-EEK-XAG\ten-LA http://www.forexticket.la/en/currency/converter-EEK-XAG\ten-SG http://www.forexticket.sg/en/currency/converter-EEK-XAG\ten-NZ http://www.forexticket.co.nz/en/currency/converter-EEK-XAG\ten-PR http://www.forexticket.com.pr/en/currency/converter-EEK-XAG\ten-FJ http://www.forexticket.com.fj/en/currency/converter-EEK-XAG\tes-US http://www.forexticket.us/es/cambio/divisas-EEK-XAG\tes-MX http://www.forexticket.mx/es/cambio/divisas-EEK-XAG\tes-CO http://www.forexticket.co/es/cambio/divisas-EEK-XAG\tes-AR http://www.forexticket.com.ar/es/cambio/divisas-EEK-XAG\tes-PE http://www.forexticket-pe.com/es/cambio/divisas-EEK-XAG\tes-VE http://www.forexticket.co.ve/es/cambio/divisas-EEK-XAG\tes-CL http://www.forexticket.cl/es/cambio/divisas-EEK-XAG\tes-EC http://www.forexticket.ec/es/cambio/divisas-EEK-XAG\tes-GT http://www.forexticket.com.gt/es/cambio/divisas-EEK-XAG\tes-BO http://www.forexticket.bo/es/cambio/divisas-EEK-XAG\tes-HN http://www.forexticket.hn/es/cambio/divisas-EEK-XAG\tes-PY http://www.forexticket.com.py/es/cambio/divisas-EEK-XAG\tes-ES http://www.forexticket.es/es/cambio/divisas-EEK-XAG\tes-SV http://www.forexticket.com.sv/es/cambio/divisas-EEK-XAG\tes-NI http://www.forexticket.com.ni/es/cambio/divisas-EEK-XAG\tes-CR http://www.forexticket.co.cr/es/cambio/divisas-EEK-XAG\tes-PR http://www.forexticket.com.pr/es/cambio/divisas-EEK-XAG\tes-UY http://www.forexticket.com.uy/es/cambio/divisas-EEK-XAG\tes-PA http://www.forexticket.com.pa/es/cambio/divisas-EEK-XAG\tid-ID http://www.forexticket.asia.com/id/currency/converter-EEK-XAG\tpt-BR http://www.forexticket.com.br/pt/moeda/conversor-EEK-XAG\tpt-MZ http://www.forexticket-mz.com/pt/moeda/conversor-EEK-XAG\tpt-PT http://www.forexticket.com.pt/pt/moeda/conversor-EEK-XAG\tpt-TL http://www.forexticket.tl/pt/moeda/conversor-EEK-XAG\tru-RU http://www.forexticket.ru/ru/currency/converter-EEK-XAG\tru-KZ http://www.forexticket-kz.com/ru/currency/converter-EEK-XAG\tru-TJ http://www.forexticket-tj.com/ru/currency/converter-EEK-XAG\tru-KG http://www.forexticket-kg.com/ru/currency/converter-EEK-XAG\tru-GE http://www.forexticket-ge.com/ru/currency/converter-EEK-XAG\tru-MN http://www.forexticket.mn/ru/currency/converter-EEK-XAG\tja-JP http://www.forexticket.jp/ja/currency/converter-EEK-XAG\ttl-PH http://www.forexticket-ph.com/tl/currency/converter-EEK-XAG\tvi-VN http://www.forexticket.vn/vi/currency/converter-EEK-XAG\tde-DE http://www.forexticket.de/de/waehrungsumrechner/devisen-EEK-XAG\tde-BE http://www.forexticket.be/de/waehrungsumrechner/devisen-EEK-XAG\tde-AT http://www.forexticket.at/de/waehrungsumrechner/devisen-EEK-XAG\tde-CH http://www.forexticket.ch/de/waehrungsumrechner/devisen-EEK-XAG\tde-LU http://www.forexticket.lu/de/waehrungsumrechner/devisen-EEK-XAG\tde-LI http://www.forexticket.li/de/waehrungsumrechner/devisen-EEK-XAG\tar-EG http://www.forexticket-eg.com/ar/currency/converter-EEK-XAG\tar-DZ http://www.forexticket-dz.com/ar/currency/converter-EEK-XAG\tar-MA http://www.forexticket-ma.com/ar/currency/converter-EEK-XAG\tar-IQ http://www.forexticket-iq.com/ar/currency/converter-EEK-XAG\tar-SA http://www.forexticket-sa.com/ar/currency/converter-EEK-XAG\tar-SY http://www.forexticket-sy.com/ar/currency/converter-EEK-XAG\tar-TN http://www.forexticket-tn.com/ar/currency/converter-EEK-XAG\tar-TD http://www.forexticket-td.com/ar/currency/converter-EEK-XAG\tar-SO http://www.forexticket-so.com/ar/currency/converter-EEK-XAG\tar-IL http://www.forexticket.co.il/ar/currency/converter-EEK-XAG\tar-JO http://www.forexticket-jo.com/ar/currency/converter-EEK-XAG\tar-AE http://www.forexticket.ae/ar/currency/converter-EEK-XAG\tar-LB http://www.forexticket-lb.com/ar/currency/converter-EEK-XAG\tar-OM http://www.forexticket-om.com/ar/currency/converter-EEK-XAG\tar-KW http://www.forexticket-kw.com/ar/currency/converter-EEK-XAG\ttr-TR http://www.forexticket-tr.com/tr/currency/converter-EEK-XAG\ttr-BG http://www.forexticket-bg.com/tr/currency/converter-EEK-XAG\ttr-CY http://www.forexticket-cy.com/tr/currency/converter-EEK-XAG\tfa-IR http://www.forexticket.ir/fa/currency/converter-EEK-XAG\tfa-AF http://www.forexticket.af/fa/currency/converter-EEK-XAG\tfr-CD http://www.forexticket.cd/fr/conversion/monnaie-EEK-XAG\tfr-FR http://www.forexticket.fr/fr/conversion/monnaie-EEK-XAG\tfr-CA http://www.forexticket.com/fr/conversion/monnaie-EEK-XAG\tfr-MG http://www.forexticket.mg/fr/conversion/monnaie-EEK-XAG\tfr-CM http://www.forexticket.cm/fr/conversion/monnaie-EEK-XAG\tfr-KH http://www.forexticket-kh.com/fr/conversion/monnaie-EEK-XAG\tfr-ML http://www.forexticket-ml.com/fr/conversion/monnaie-EEK-XAG\tfr-SN http://www.forexticket-sn.com/fr/conversion/monnaie-EEK-XAG\tfr-TN http://www.forexticket-tn.com/fr/conversion/monnaie-EEK-XAG\tfr-TD http://www.forexticket-td.com/fr/conversion/monnaie-EEK-XAG\tfr-BE http://www.forexticket.be/fr/conversion/monnaie-EEK-XAG\tfr-GN http://www.forexticket-gn.com/fr/conversion/monnaie-EEK-XAG\tfr-HT http://www.forexticket.ht/fr/conversion/monnaie-EEK-XAG\tfr-CH http://www.forexticket.ch/fr/conversion/monnaie-EEK-XAG\tfr-LA http://www.forexticket.la/fr/conversion/monnaie-EEK-XAG\tfr-LU http://www.forexticket.lu/fr/conversion/monnaie-EEK-XAG\tth-TH http://www.forexticket-th.com/th/currency/converter-EEK-XAG\tcy-GB http://www.forexticket.co.uk/cy/currency/converter-EEK-XAG\tga-GB http://www.forexticket.co.uk/ga/currency/converter-EEK-XAG\tit-IT http://www.forexticket.it/it/convertitore/valuta-EEK-XAG\tit-CH http://www.forexticket.ch/it/convertitore/valuta-EEK-XAG\taf-ZA http://www.forexticket.co.za/af/currency/converter-EEK-XAG\tko-KR http://www.forexticket.kr/ko/currency/converter-EEK-XAG\tuk-UA http://www.forexticket-ua.com/uk/currency/converter-EEK-XAG\tsw-TZ http://www.forexticket-tz.com/sw/currency/converter-EEK-XAG\tsw-KE http://www.forexticket.co.ke/sw/currency/converter-EEK-XAG\tpl-PL http://www.forexticket.pl/pl/currency/converter-EEK-XAG\tms-MY http://www.forexticket.com.my/ms/currency/converter-EEK-XAG\tms-SG http://www.forexticket.sg/ms/currency/converter-EEK-XAG\tro-RO http://www.forexticket.ro/ro/currency/converter-EEK-XAG\tnl-NL http://www.forexticket.nl/nl/currency/converter-EEK-XAG\tnl-BE http://www.forexticket.be/nl/currency/converter-EEK-XAG\tel-GR http://www.forexticket.gr/el/currency/converter-EEK-XAG\tel-AL http://www.forexticket-al.com/el/currency/converter-EEK-XAG\tel-CY http://www.forexticket-cy.com/el/currency/converter-EEK-XAG\tcs-CZ http://www.forexticket.cz/cs/currency/converter-EEK-XAG\thu-HU http://www.forexticket.hu/hu/currency/converter-EEK-XAG\tsv-SE http://www.forexticket.se/sv/currency/converter-EEK-XAG\tsv-FI http://www.forexticket.eu/sv/currency/converter-EEK-XAG\tiw-IL http://www.forexticket.co.il/iw/currency/converter-EEK-XAG\tyi-IL http://www.forexticket.co.il/yi/currency/converter-EEK-XAG\tbg-BG http://www.forexticket-bg.com/bg/currency/converter-EEK-XAG\tca-ES http://www.forexticket.es/ca/currency/converter-EEK-XAG\tgl-ES http://www.forexticket.es/gl/currency/converter-EEK-XAG\tda-DK http://www.forexticket.dk/da/currency/converter-EEK-XAG\tfi-FI http://www.forexticket.eu/fi/currency/converter-EEK-XAG\thr-HR http://www.forexticket-hr.com/hr/currency/converter-EEK-XAG\tsr-HR http://www.forexticket-hr.com/sr/currency/converter-EEK-XAG\tsr-ME http://www.forexticket.me/sr/currency/converter-EEK-XAG\tlt-LT http://www.forexticket.lt/lt/currency/converter-EEK-XAG\tsq-AL http://www.forexticket-al.com/sq/currency/converter-EEK-XAG\tlv-LV http://www.forexticket.lv/lv/currency/converter-EEK-XAG\tet-EE http://www.forexticket.co.ee/et/currency/converter-EEK-XAG"; diff --git a/library/cpp/http/fetch/sockhandler.h b/library/cpp/http/fetch/sockhandler.h new file mode 100644 index 0000000000..e18149f657 --- /dev/null +++ b/library/cpp/http/fetch/sockhandler.h @@ -0,0 +1,130 @@ +#pragma once + +#include <library/cpp/logger/all.h> + +#include <util/generic/buffer.h> +#include <util/generic/map.h> +#include <util/generic/vector.h> +#include <util/network/address.h> +#include <util/network/ip.h> +#include <util/network/socket.h> +#include <util/system/mutex.h> +#include <util/system/yassert.h> + +#include <cerrno> +#include <util/generic/noncopyable.h> + +class TAddrList: public TVector<NAddr::IRemoteAddrRef> { +private: + using TBase = TVector<NAddr::IRemoteAddrRef>; + +public: + //msvc doesn't support base class constructor inheritance + TAddrList() = default; + + template <typename T> + TAddrList(T&& arg) + : TBase(std::forward<T>(arg)) + { + } + + template <typename T1, typename T2> + TAddrList(T1&& arg1, T2&& arg2) + : TBase(std::forward<T1>(arg1), std::forward<T2>(arg2)) + { + } + + TAddrList(std::initializer_list<NAddr::IRemoteAddrRef> list) + : TBase(list) + { + } + + static TAddrList MakeV4Addr(ui32 ip, TIpPort port) { + return TAddrList({new NAddr::TIPv4Addr(TIpAddress(htonl(ip), htons(port)))}); + } + + std::pair<ui32, TIpPort> GetV4Addr() const { + for (const auto& addrRef : *this) { + const sockaddr* sa = addrRef->Addr(); + if (sa->sa_family == AF_INET) { + const sockaddr_in* sin = reinterpret_cast<const sockaddr_in*>(sa); + return std::make_pair(ntohl(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + } + } + return std::make_pair(0, 0); + } +}; + +class TSimpleSocketHandler { +public: + TSimpleSocketHandler() = default; + + int Good() const { + return static_cast<bool>(Socket); + } + + int Connect(const TAddrList& addrs, TDuration timeout) { + try { + for (const auto& item : addrs) { + const sockaddr* sa = item->Addr(); + TSocketHolder s(socket(sa->sa_family, SOCK_STREAM, 0)); + if (s.Closed()) { + continue; + } + +#ifndef WIN32 + if (fcntl(s, F_SETFD, FD_CLOEXEC)) // no inherit on fork()/exec() + return errno ? errno : EBADF; +#endif + if (connect(s, sa, item->Len())) { + s.Close(); + continue; + } + + Socket.Reset(new TSocket(s.Release())); + Socket->SetSocketTimeout(timeout.Seconds(), timeout.MilliSecondsOfSecond()); + Socket->SetZeroLinger(); + Socket->SetKeepAlive(true); + return 0; + } + } catch (...) { + return EBADF; + } + return errno ? errno : EBADF; + } + + void Disconnect() { + if (!Socket) + return; + Socket->ShutDown(SHUT_RDWR); + Socket.Destroy(); + } + + void SetSocket(SOCKET fd) { + Socket.Reset(new TSocket(fd)); + } + + void shutdown() { + Socket->ShutDown(SHUT_WR); + } + + int send(const void* message, size_t messlen) { + return ((ssize_t)messlen == Socket->Send(message, messlen)); + } + + int peek() { + char buf[1]; + return (1 == recv(*Socket, buf, 1, MSG_PEEK)); + } + + ssize_t read(void* buffer, size_t buflen) { + return Socket->Recv(buffer, buflen); + } + + THolder<TSocket> PickOutSocket() { + return std::move(Socket); + } + +protected: + THolder<TSocket> Socket; +}; diff --git a/library/cpp/http/fetch/ut/ya.make b/library/cpp/http/fetch/ut/ya.make new file mode 100644 index 0000000000..7486986b36 --- /dev/null +++ b/library/cpp/http/fetch/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/http/fetch) + +OWNER( + g:zora +) + +SRCS( + httpfsm_ut.cpp + httpparser_ut.cpp +) + +END() diff --git a/library/cpp/http/fetch/ya.make b/library/cpp/http/fetch/ya.make new file mode 100644 index 0000000000..7737127463 --- /dev/null +++ b/library/cpp/http/fetch/ya.make @@ -0,0 +1,38 @@ +LIBRARY() + +OWNER( + g:zora +) + +PEERDIR( + contrib/libs/zlib + library/cpp/charset + library/cpp/digest/md5 + library/cpp/http/misc + library/cpp/logger + library/cpp/mime/types + library/cpp/uri +) + +SRCS( + http_digest.cpp + http_socket.cpp + httpheader.cpp + httpload.cpp + exthttpcodes.cpp + httpfsm.rl6 + httpagent.h + httpfetcher.h + httpheader.h + httpparser.h + httpzreader.h + sockhandler.h +) + +GENERATE_ENUM_SERIALIZATION(httpheader.h) + +SET(RAGEL6_FLAGS -CF1) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/library/cpp/http/io/chunk.cpp b/library/cpp/http/io/chunk.cpp new file mode 100644 index 0000000000..6975d9eac1 --- /dev/null +++ b/library/cpp/http/io/chunk.cpp @@ -0,0 +1,246 @@ +#include "chunk.h" + +#include "headers.h" + +#include <util/string/cast.h> +#include <util/generic/utility.h> +#include <util/generic/yexception.h> + +static inline size_t ParseHex(const TString& s) { + if (s.empty()) { + ythrow yexception() << "can not parse chunk length(empty string)"; + } + + size_t ret = 0; + + for (TString::const_iterator c = s.begin(); c != s.end(); ++c) { + const char ch = *c; + + if (ch >= '0' && ch <= '9') { + ret *= 16; + ret += ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + ret *= 16; + ret += 10 + ch - 'a'; + } else if (ch >= 'A' && ch <= 'F') { + ret *= 16; + ret += 10 + ch - 'A'; + } else if (ch == ';') { + break; + } else if (isspace(ch)) { + continue; + } else { + ythrow yexception() << "can not parse chunk length(" << s.data() << ")"; + } + } + + return ret; +} + +static inline char* ToHex(size_t len, char* buf) { + do { + const size_t val = len % 16; + + *--buf = (val < 10) ? (val + '0') : (val - 10 + 'a'); + len /= 16; + } while (len); + + return buf; +} + +class TChunkedInput::TImpl { +public: + inline TImpl(IInputStream* slave, TMaybe<THttpHeaders>* trailers) + : Slave_(slave) + , Trailers_(trailers) + , Pending_(0) + , LastChunkReaded_(false) + { + if (Trailers_) { + Trailers_->Clear(); + } + } + + inline ~TImpl() { + } + + inline size_t Read(void* buf, size_t len) { + return Perform(len, [this, buf](size_t toRead) { return Slave_->Read(buf, toRead); }); + } + + inline size_t Skip(size_t len) { + return Perform(len, [this](size_t toSkip) { return Slave_->Skip(toSkip); }); + } + +private: + template <class Operation> + inline size_t Perform(size_t len, const Operation& operation) { + if (!HavePendingData()) { + return 0; + } + + const size_t toProcess = Min(Pending_, len); + + if (toProcess) { + const size_t processed = operation(toProcess); + + if (!processed) { + ythrow yexception() << "malformed http chunk"; + } + + Pending_ -= processed; + + return processed; + } + + return 0; + } + + inline bool HavePendingData() { + if (LastChunkReaded_) { + return false; + } + + if (!Pending_) { + if (!ProceedToNextChunk()) { + return false; + } + } + + return true; + } + + inline bool ProceedToNextChunk() { + TString len(Slave_->ReadLine()); + + if (len.empty()) { + /* + * skip crlf from previous chunk + */ + + len = Slave_->ReadLine(); + } + + Pending_ = ParseHex(len); + + if (Pending_) { + return true; + } + + if (Trailers_) { + Trailers_->ConstructInPlace(Slave_); + } + LastChunkReaded_ = true; + + return false; + } + +private: + IInputStream* Slave_; + TMaybe<THttpHeaders>* Trailers_; + size_t Pending_; + bool LastChunkReaded_; +}; + +TChunkedInput::TChunkedInput(IInputStream* slave, TMaybe<THttpHeaders>* trailers) + : Impl_(new TImpl(slave, trailers)) +{ +} + +TChunkedInput::~TChunkedInput() { +} + +size_t TChunkedInput::DoRead(void* buf, size_t len) { + return Impl_->Read(buf, len); +} + +size_t TChunkedInput::DoSkip(size_t len) { + return Impl_->Skip(len); +} + +class TChunkedOutput::TImpl { + typedef IOutputStream::TPart TPart; + +public: + inline TImpl(IOutputStream* slave) + : Slave_(slave) + { + } + + inline ~TImpl() { + } + + inline void Write(const void* buf, size_t len) { + const char* ptr = (const char*)buf; + + while (len) { + const size_t portion = Min<size_t>(len, 1024 * 16); + + WriteImpl(ptr, portion); + + ptr += portion; + len -= portion; + } + } + + inline void WriteImpl(const void* buf, size_t len) { + char tmp[32]; + char* e = tmp + sizeof(tmp); + char* b = ToHex(len, e); + + const TPart parts[] = { + TPart(b, e - b), + TPart::CrLf(), + TPart(buf, len), + TPart::CrLf(), + }; + + Slave_->Write(parts, sizeof(parts) / sizeof(*parts)); + } + + inline void Flush() { + Slave_->Flush(); + } + + inline void Finish() { + Slave_->Write("0\r\n\r\n", 5); + + Flush(); + } + +private: + IOutputStream* Slave_; +}; + +TChunkedOutput::TChunkedOutput(IOutputStream* slave) + : Impl_(new TImpl(slave)) +{ +} + +TChunkedOutput::~TChunkedOutput() { + try { + Finish(); + } catch (...) { + } +} + +void TChunkedOutput::DoWrite(const void* buf, size_t len) { + if (Impl_.Get()) { + Impl_->Write(buf, len); + } else { + ythrow yexception() << "can not write to finished stream"; + } +} + +void TChunkedOutput::DoFlush() { + if (Impl_.Get()) { + Impl_->Flush(); + } +} + +void TChunkedOutput::DoFinish() { + if (Impl_.Get()) { + Impl_->Finish(); + Impl_.Destroy(); + } +} diff --git a/library/cpp/http/io/chunk.h b/library/cpp/http/io/chunk.h new file mode 100644 index 0000000000..88d89fafda --- /dev/null +++ b/library/cpp/http/io/chunk.h @@ -0,0 +1,47 @@ +#pragma once + +#include <util/stream/output.h> +#include <util/generic/maybe.h> +#include <util/generic/ptr.h> + +class THttpHeaders; + +/// @addtogroup Streams_Chunked +/// @{ +/// Ввод данных порциями. +/// @details Последовательное чтение блоков данных. Предполагается, что +/// данные записаны в виде <длина блока><блок данных>. +class TChunkedInput: public IInputStream { +public: + /// Если передан указатель на trailers, то туда будут записаны HTTP trailer'ы (возможно пустые), + /// которые идут после чанков. + TChunkedInput(IInputStream* slave, TMaybe<THttpHeaders>* trailers = nullptr); + ~TChunkedInput() override; + +private: + size_t DoRead(void* buf, size_t len) override; + size_t DoSkip(size_t len) override; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; + +/// Вывод данных порциями. +/// @details Вывод данных блоками в виде <длина блока><блок данных>. Если объем +/// данных превышает 64K, они записываются в виде n блоков по 64K + то, что осталось. +class TChunkedOutput: public IOutputStream { +public: + TChunkedOutput(IOutputStream* slave); + ~TChunkedOutput() override; + +private: + void DoWrite(const void* buf, size_t len) override; + void DoFlush() override; + void DoFinish() override; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; +/// @} diff --git a/library/cpp/http/io/chunk_ut.cpp b/library/cpp/http/io/chunk_ut.cpp new file mode 100644 index 0000000000..da283f8568 --- /dev/null +++ b/library/cpp/http/io/chunk_ut.cpp @@ -0,0 +1,105 @@ +#include "chunk.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/file.h> +#include <util/system/tempfile.h> +#include <util/stream/null.h> + +#define CDATA "./chunkedio" + +Y_UNIT_TEST_SUITE(TestChunkedIO) { + static const char test_data[] = "87s6cfbsudg cuisg s igasidftasiy tfrcua6s"; + + TString CombString(const TString& s, size_t chunkSize) { + TString result; + for (size_t pos = 0; pos < s.size(); pos += 2 * chunkSize) + result += s.substr(pos, chunkSize); + return result; + } + + void WriteTestData(IOutputStream * stream, TString * contents) { + contents->clear(); + for (size_t i = 0; i < sizeof(test_data); ++i) { + stream->Write(test_data, i); + contents->append(test_data, i); + } + } + + void ReadInSmallChunks(IInputStream * stream, TString * contents) { + char buf[11]; + size_t read = 0; + + contents->clear(); + do { + read = stream->Read(buf, sizeof(buf)); + contents->append(buf, read); + } while (read > 0); + } + + void ReadCombed(IInputStream * stream, TString * contents, size_t chunkSize) { + Y_ASSERT(chunkSize < 128); + char buf[128]; + + contents->clear(); + while (true) { + size_t read = stream->Load(buf, chunkSize); + contents->append(buf, read); + if (read == 0) + break; + + size_t toSkip = chunkSize; + size_t skipped = 0; + do { + skipped = stream->Skip(toSkip); + toSkip -= skipped; + } while (skipped != 0 && toSkip != 0); + } + } + + Y_UNIT_TEST(TestChunkedIo) { + TTempFile tmpFile(CDATA); + TString tmp; + + { + TUnbufferedFileOutput fo(CDATA); + TChunkedOutput co(&fo); + WriteTestData(&co, &tmp); + } + + { + TUnbufferedFileInput fi(CDATA); + TChunkedInput ci(&fi); + TString r; + + ReadInSmallChunks(&ci, &r); + + UNIT_ASSERT_EQUAL(r, tmp); + } + + { + TUnbufferedFileInput fi(CDATA); + TChunkedInput ci(&fi); + TString r; + + ReadCombed(&ci, &r, 11); + + UNIT_ASSERT_EQUAL(r, CombString(tmp, 11)); + } + } + + Y_UNIT_TEST(TestBadChunk) { + bool hasError = false; + + try { + TString badChunk = "10\r\nqwerty"; + TMemoryInput mi(badChunk.data(), badChunk.size()); + TChunkedInput ci(&mi); + TransferData(&ci, &Cnull); + } catch (...) { + hasError = true; + } + + UNIT_ASSERT(hasError); + } +} diff --git a/library/cpp/http/io/compression.cpp b/library/cpp/http/io/compression.cpp new file mode 100644 index 0000000000..8fa1f62ae6 --- /dev/null +++ b/library/cpp/http/io/compression.cpp @@ -0,0 +1,66 @@ +#include "compression.h" + +#if defined(ENABLE_GPL) +#include <library/cpp/streams/lz/lz.h> +#endif + +#include <library/cpp/streams/brotli/brotli.h> +#include <library/cpp/streams/lzma/lzma.h> +#include <library/cpp/streams/bzip2/bzip2.h> + +#include <library/cpp/blockcodecs/stream.h> +#include <library/cpp/blockcodecs/codecs.h> + +#include <util/stream/zlib.h> + + +TCompressionCodecFactory::TCompressionCodecFactory() { + auto gzip = [](auto s) { + return MakeHolder<TZLibDecompress>(s); + }; + + Add("gzip", gzip, [](auto s) { return MakeHolder<TZLibCompress>(s, ZLib::GZip); }); + Add("deflate", gzip, [](auto s) { return MakeHolder<TZLibCompress>(s, ZLib::ZLib); }); + Add("br", [](auto s) { return MakeHolder<TBrotliDecompress>(s); }, [](auto s) { return MakeHolder<TBrotliCompress>(s, 4); }); + Add("x-gzip", gzip, [](auto s) { return MakeHolder<TZLibCompress>(s, ZLib::GZip); }); + Add("x-deflate", gzip, [](auto s) { return MakeHolder<TZLibCompress>(s, ZLib::ZLib); }); + +#if defined(ENABLE_GPL) + const ui16 bs = 32 * 1024; + + Add("y-lzo", [](auto s) { return MakeHolder<TLzoDecompress>(s); }, [bs](auto s) { return MakeHolder<TLazy<TLzoCompress> >(s, bs); }); + Add("y-lzf", [](auto s) { return MakeHolder<TLzfDecompress>(s); }, [bs](auto s) { return MakeHolder<TLazy<TLzfCompress> >(s, bs); }); + Add("y-lzq", [](auto s) { return MakeHolder<TLzqDecompress>(s); }, [bs](auto s) { return MakeHolder<TLazy<TLzqCompress> >(s, bs); }); +#endif + + Add("y-bzip2", [](auto s) { return MakeHolder<TBZipDecompress>(s); }, [](auto s) { return MakeHolder<TBZipCompress>(s); }); + Add("y-lzma", [](auto s) { return MakeHolder<TLzmaDecompress>(s); }, [](auto s) { return MakeHolder<TLzmaCompress>(s); }); + + for (auto codecName : NBlockCodecs::ListAllCodecs()) { + if (codecName.StartsWith("zstd06")) { + continue; + } + + if (codecName.StartsWith("zstd08")) { + continue; + } + + auto codec = NBlockCodecs::Codec(codecName); + + auto enc = [codec](auto s) { + return MakeHolder<NBlockCodecs::TCodedOutput>(s, codec, 32 * 1024); + }; + + auto dec = [codec](auto s) { + return MakeHolder<NBlockCodecs::TDecodedInput>(s, codec); + }; + + Add(TString("z-") + codecName, dec, enc); + } +} + +void TCompressionCodecFactory::Add(TStringBuf name, TDecoderConstructor d, TEncoderConstructor e) { + Strings_.emplace_back(name); + Codecs_[Strings_.back()] = TCodec{d, e}; + BestCodecs_.emplace_back(Strings_.back()); +} diff --git a/library/cpp/http/io/compression.h b/library/cpp/http/io/compression.h new file mode 100644 index 0000000000..f16c4a18eb --- /dev/null +++ b/library/cpp/http/io/compression.h @@ -0,0 +1,72 @@ +#pragma once + +#include "stream.h" + +#include <util/generic/deque.h> +#include <util/generic/hash.h> + +class TCompressionCodecFactory { +public: + using TDecoderConstructor = std::function<THolder<IInputStream>(IInputStream*)>; + using TEncoderConstructor = std::function<THolder<IOutputStream>(IOutputStream*)>; + + TCompressionCodecFactory(); + + static inline TCompressionCodecFactory& Instance() noexcept { + return *SingletonWithPriority<TCompressionCodecFactory, 0>(); + } + + inline const TDecoderConstructor* FindDecoder(TStringBuf name) const { + if (auto codec = Codecs_.FindPtr(name)) { + return &codec->Decoder; + } + + return nullptr; + } + + inline const TEncoderConstructor* FindEncoder(TStringBuf name) const { + if (auto codec = Codecs_.FindPtr(name)) { + return &codec->Encoder; + } + + return nullptr; + } + + inline TArrayRef<const TStringBuf> GetBestCodecs() const { + return BestCodecs_; + } + +private: + void Add(TStringBuf name, TDecoderConstructor d, TEncoderConstructor e); + + struct TCodec { + TDecoderConstructor Decoder; + TEncoderConstructor Encoder; + }; + + TDeque<TString> Strings_; + THashMap<TStringBuf, TCodec> Codecs_; + TVector<TStringBuf> BestCodecs_; +}; + +namespace NHttp { + template <typename F> + TString ChooseBestCompressionScheme(F accepted, TArrayRef<const TStringBuf> available) { + if (available.empty()) { + return "identity"; + } + + if (accepted("*")) { + return TString(available[0]); + } + + for (const auto& coding : available) { + TString s(coding); + if (accepted(s)) { + return s; + } + } + + return "identity"; + } +} diff --git a/library/cpp/http/io/compression_ut.cpp b/library/cpp/http/io/compression_ut.cpp new file mode 100644 index 0000000000..2f3d131f8c --- /dev/null +++ b/library/cpp/http/io/compression_ut.cpp @@ -0,0 +1,60 @@ +#include "stream.h" +#include "compression.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/stream/zlib.h> +#include <util/generic/hash_set.h> + +Y_UNIT_TEST_SUITE(THttpCompressionTest) { + static const TString DATA = "I'm a teapot"; + + Y_UNIT_TEST(TestGetBestCodecs) { + UNIT_ASSERT(TCompressionCodecFactory::Instance().GetBestCodecs().size() > 0); + } + + Y_UNIT_TEST(TestEncoder) { + TStringStream buffer; + + { + auto encoder = TCompressionCodecFactory::Instance().FindEncoder("gzip"); + UNIT_ASSERT(encoder); + + auto encodedStream = (*encoder)(&buffer); + encodedStream->Write(DATA); + } + + TZLibDecompress decompressor(&buffer); + UNIT_ASSERT_EQUAL(decompressor.ReadAll(), DATA); + } + + Y_UNIT_TEST(TestDecoder) { + TStringStream buffer; + + { + TZLibCompress compressor(TZLibCompress::TParams(&buffer).SetType(ZLib::GZip)); + compressor.Write(DATA); + } + + auto decoder = TCompressionCodecFactory::Instance().FindDecoder("gzip"); + UNIT_ASSERT(decoder); + + auto decodedStream = (*decoder)(&buffer); + UNIT_ASSERT_EQUAL(decodedStream->ReadAll(), DATA); + } + + Y_UNIT_TEST(TestChooseBestCompressionScheme) { + THashSet<TString> accepted; + + auto checkAccepted = [&accepted](const TString& v) { + return accepted.contains(v); + }; + + UNIT_ASSERT_VALUES_EQUAL("identity", NHttp::ChooseBestCompressionScheme(checkAccepted, {"gzip", "deflate"})); + accepted.insert("deflate"); + UNIT_ASSERT_VALUES_EQUAL("deflate", NHttp::ChooseBestCompressionScheme(checkAccepted, {"gzip", "deflate"})); + accepted.insert("*"); + UNIT_ASSERT_VALUES_EQUAL("gzip", NHttp::ChooseBestCompressionScheme(checkAccepted, {"gzip", "deflate"})); + } +} // THttpCompressionTest suite diff --git a/library/cpp/http/io/fuzz/main.cpp b/library/cpp/http/io/fuzz/main.cpp new file mode 100644 index 0000000000..8ded9c7e32 --- /dev/null +++ b/library/cpp/http/io/fuzz/main.cpp @@ -0,0 +1,15 @@ +#include <library/cpp/http/io/stream.h> + +#include <util/generic/vector.h> +#include <util/stream/mem.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { + TMemoryInput mi(data, size); + + try { + THttpInput(&mi).ReadAll(); + } catch (...) { + } + + return 0; // Non-zero return values are reserved for future use. +} diff --git a/library/cpp/http/io/fuzz/ya.make b/library/cpp/http/io/fuzz/ya.make new file mode 100644 index 0000000000..8b3ccb1969 --- /dev/null +++ b/library/cpp/http/io/fuzz/ya.make @@ -0,0 +1,18 @@ +FUZZ() + +OWNER( + pg + g:util +) + +PEERDIR( + library/cpp/http/io +) + +SIZE(MEDIUM) + +SRCS( + main.cpp +) + +END() diff --git a/library/cpp/http/io/headers.cpp b/library/cpp/http/io/headers.cpp new file mode 100644 index 0000000000..4ec27a29e8 --- /dev/null +++ b/library/cpp/http/io/headers.cpp @@ -0,0 +1,108 @@ +#include "headers.h" +#include "stream.h" + +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> +#include <util/stream/output.h> +#include <util/string/ascii.h> +#include <util/string/cast.h> +#include <util/string/strip.h> + +static inline TStringBuf Trim(const char* b, const char* e) noexcept { + return StripString(TStringBuf(b, e)); +} + +THttpInputHeader::THttpInputHeader(const TStringBuf header) { + size_t pos = header.find(':'); + + if (pos == TString::npos) { + ythrow THttpParseException() << "can not parse http header(" << TString{header}.Quote() << ")"; + } + + Name_ = TString(header.cbegin(), header.cbegin() + pos); + Value_ = ::ToString(Trim(header.cbegin() + pos + 1, header.cend())); +} + +THttpInputHeader::THttpInputHeader(TString name, TString value) + : Name_(std::move(name)) + , Value_(std::move(value)) +{ +} + +void THttpInputHeader::OutTo(IOutputStream* stream) const { + typedef IOutputStream::TPart TPart; + + const TPart parts[] = { + TPart(Name_), + TPart(": ", 2), + TPart(Value_), + TPart::CrLf(), + }; + + stream->Write(parts, sizeof(parts) / sizeof(*parts)); +} + +THttpHeaders::THttpHeaders(IInputStream* stream) { + TString header; + TString line; + + bool rdOk = stream->ReadLine(header); + while (rdOk && !header.empty()) { + rdOk = stream->ReadLine(line); + + if (rdOk && ((line[0] == ' ') || (line[0] == '\t'))) { + header += line; + } else { + AddHeader(THttpInputHeader(header)); + header = line; + } + } +} + +bool THttpHeaders::HasHeader(const TStringBuf header) const { + return FindHeader(header); +} + +const THttpInputHeader* THttpHeaders::FindHeader(const TStringBuf header) const { + for (const auto& hdr : Headers_) { + if (AsciiCompareIgnoreCase(hdr.Name(), header) == 0) { + return &hdr; + } + } + return nullptr; +} + +void THttpHeaders::RemoveHeader(const TStringBuf header) { + for (auto h = Headers_.begin(); h != Headers_.end(); ++h) { + if (AsciiCompareIgnoreCase(h->Name(), header) == 0) { + Headers_.erase(h); + return; + } + } +} + +void THttpHeaders::AddOrReplaceHeader(const THttpInputHeader& header) { + for (auto& hdr : Headers_) { + if (AsciiCompareIgnoreCase(hdr.Name(), header.Name()) == 0) { + hdr = header; + return; + } + } + + AddHeader(header); +} + +void THttpHeaders::AddHeader(THttpInputHeader header) { + Headers_.push_back(std::move(header)); +} + +void THttpHeaders::OutTo(IOutputStream* stream) const { + for (TConstIterator header = Begin(); header != End(); ++header) { + header->OutTo(stream); + } +} + +template <> +void Out<THttpHeaders>(IOutputStream& out, const THttpHeaders& h) { + h.OutTo(&out); +} diff --git a/library/cpp/http/io/headers.h b/library/cpp/http/io/headers.h new file mode 100644 index 0000000000..a71793d1c6 --- /dev/null +++ b/library/cpp/http/io/headers.h @@ -0,0 +1,125 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/strbuf.h> +#include <util/generic/deque.h> +#include <util/generic/vector.h> +#include <util/string/cast.h> + +class IInputStream; +class IOutputStream; + +/// @addtogroup Streams_HTTP +/// @{ +/// Объект, содержащий информацию о HTTP-заголовке. +class THttpInputHeader { +public: + /// @param[in] header - строка вида 'параметр: значение'. + THttpInputHeader(TStringBuf header); + /// @param[in] name - имя параметра. + /// @param[in] value - значение параметра. + THttpInputHeader(TString name, TString value); + + /// Возвращает имя параметра. + inline const TString& Name() const noexcept { + return Name_; + } + + /// Возвращает значение параметра. + inline const TString& Value() const noexcept { + return Value_; + } + + /// Записывает заголовок вида "имя параметра: значение\r\n" в поток. + void OutTo(IOutputStream* stream) const; + + /// Возвращает строку "имя параметра: значение". + inline TString ToString() const { + return Name_ + TStringBuf(": ") + Value_; + } + +private: + TString Name_; + TString Value_; +}; + +/// Контейнер для хранения HTTP-заголовков +class THttpHeaders { + using THeaders = TDeque<THttpInputHeader>; + +public: + using TConstIterator = THeaders::const_iterator; + + THttpHeaders() = default; + + /// Добавляет каждую строку из потока в контейнер, считая ее правильным заголовком. + THttpHeaders(IInputStream* stream); + + /// Стандартный итератор. + inline TConstIterator Begin() const noexcept { + return Headers_.begin(); + } + inline TConstIterator begin() const noexcept { + return Headers_.begin(); + } + + /// Стандартный итератор. + inline TConstIterator End() const noexcept { + return Headers_.end(); + } + inline TConstIterator end() const noexcept { + return Headers_.end(); + } + + /// Возвращает количество заголовков в контейнере. + inline size_t Count() const noexcept { + return Headers_.size(); + } + + /// Проверяет, содержит ли контейнер хотя бы один заголовок. + inline bool Empty() const noexcept { + return Headers_.empty(); + } + + /// Добавляет заголовок в контейнер. + void AddHeader(THttpInputHeader header); + + template <typename ValueType> + void AddHeader(TString name, const ValueType& value) { + AddHeader(THttpInputHeader(std::move(name), ToString(value))); + } + + /// Добавляет заголовок в контейнер, если тот не содержит заголовка + /// c таким же параметром. В противном случае, заменяет существующий + /// заголовок на новый. + void AddOrReplaceHeader(const THttpInputHeader& header); + + template <typename ValueType> + void AddOrReplaceHeader(TString name, const ValueType& value) { + AddOrReplaceHeader(THttpInputHeader(std::move(name), ToString(value))); + } + + // Проверяет, есть ли такой заголовок + bool HasHeader(TStringBuf header) const; + + /// Удаляет заголовок, если он есть. + void RemoveHeader(TStringBuf header); + + /// Ищет заголовок по указанному имени + /// Возвращает nullptr, если не нашел + const THttpInputHeader* FindHeader(TStringBuf header) const; + + /// Записывает все заголовки контейнера в поток. + /// @details Каждый заголовк записывается в виде "имя параметра: значение\r\n". + void OutTo(IOutputStream* stream) const; + + /// Обменивает наборы заголовков двух контейнеров. + void Swap(THttpHeaders& headers) noexcept { + Headers_.swap(headers.Headers_); + } + +private: + THeaders Headers_; +}; + +/// @} diff --git a/library/cpp/http/io/headers_ut.cpp b/library/cpp/http/io/headers_ut.cpp new file mode 100644 index 0000000000..1d23ef8fdc --- /dev/null +++ b/library/cpp/http/io/headers_ut.cpp @@ -0,0 +1,176 @@ +#include <util/generic/set.h> +#include <util/generic/string.h> +#include <util/generic/strbuf.h> +#include <utility> + +#include <library/cpp/http/io/headers.h> +#include <library/cpp/testing/unittest/registar.h> + +namespace { + class THeadersExistence { + public: + THeadersExistence() = default; + + THeadersExistence(const THttpHeaders& headers) { + for (THttpHeaders::TConstIterator it = headers.Begin(); + it != headers.End(); + ++it) { + Add(it->Name(), it->Value()); + } + } + + public: + void Add(TStringBuf name, TStringBuf value) { + Impl.emplace(TString(name), TString(value)); + } + + bool operator==(const THeadersExistence& rhs) const { + return Impl == rhs.Impl; + } + + private: + typedef TMultiSet<std::pair<TString, TString>> TImpl; + TImpl Impl; + }; +} + +bool operator==(const THeadersExistence& lhs, const THttpHeaders& rhs) { + return lhs == THeadersExistence(rhs); +} + +bool operator==(const THttpHeaders& lhs, const THeadersExistence& rhs) { + return THeadersExistence(lhs) == rhs; +} + +class THttpHeadersTest: public TTestBase { + UNIT_TEST_SUITE(THttpHeadersTest); + UNIT_TEST(TestAddOperation1Arg); + UNIT_TEST(TestAddOperation2Args); + UNIT_TEST(TestAddOrReplaceOperation1Arg); + UNIT_TEST(TestAddOrReplaceOperation2Args); + UNIT_TEST(TestAddHeaderTemplateness); + UNIT_TEST(TestFindHeader); + UNIT_TEST_SUITE_END(); + +private: + typedef void (*TAddHeaderFunction)(THttpHeaders&, TStringBuf name, TStringBuf value); + typedef void (*TAddOrReplaceHeaderFunction)(THttpHeaders&, TStringBuf name, TStringBuf value); + +public: + void TestAddOperation1Arg(); + void TestAddOperation2Args(); + void TestAddOrReplaceOperation1Arg(); + void TestAddOrReplaceOperation2Args(); + void TestAddHeaderTemplateness(); + void TestFindHeader(); + +private: + static void AddHeaderImpl1Arg(THttpHeaders& headers, TStringBuf name, TStringBuf value) { + headers.AddHeader(THttpInputHeader(TString(name), TString(value))); + } + + static void AddHeaderImpl2Args(THttpHeaders& headers, TStringBuf name, TStringBuf value) { + headers.AddHeader(TString(name), TString(value)); + } + + static void AddOrReplaceHeaderImpl1Arg(THttpHeaders& headers, TStringBuf name, TStringBuf value) { + headers.AddOrReplaceHeader(THttpInputHeader(TString(name), TString(value))); + } + + static void AddOrReplaceHeaderImpl2Args(THttpHeaders& headers, TStringBuf name, TStringBuf value) { + headers.AddOrReplaceHeader(TString(name), TString(value)); + } + + void DoTestAddOperation(TAddHeaderFunction); + void DoTestAddOrReplaceOperation(TAddHeaderFunction, TAddOrReplaceHeaderFunction); +}; + +UNIT_TEST_SUITE_REGISTRATION(THttpHeadersTest); + +void THttpHeadersTest::TestAddOperation1Arg() { + DoTestAddOperation(AddHeaderImpl1Arg); +} +void THttpHeadersTest::TestAddOperation2Args() { + DoTestAddOperation(AddHeaderImpl2Args); +} + +void THttpHeadersTest::TestAddOrReplaceOperation1Arg() { + DoTestAddOrReplaceOperation(AddHeaderImpl1Arg, AddOrReplaceHeaderImpl1Arg); +} +void THttpHeadersTest::TestAddOrReplaceOperation2Args() { + DoTestAddOrReplaceOperation(AddHeaderImpl2Args, AddOrReplaceHeaderImpl2Args); +} + +void THttpHeadersTest::DoTestAddOperation(TAddHeaderFunction addHeader) { + THttpHeaders h1; + + addHeader(h1, "h1", "v1"); + addHeader(h1, "h2", "v1"); + + addHeader(h1, "h3", "v1"); + addHeader(h1, "h3", "v2"); + addHeader(h1, "h3", "v2"); + + THeadersExistence h2; + + h2.Add("h1", "v1"); + h2.Add("h2", "v1"); + + h2.Add("h3", "v1"); + h2.Add("h3", "v2"); + h2.Add("h3", "v2"); + + UNIT_ASSERT(h2 == h1); +} + +// Sorry, but AddOrReplaceHeader replaces only first occurence +void THttpHeadersTest::DoTestAddOrReplaceOperation(TAddHeaderFunction addHeader, TAddOrReplaceHeaderFunction addOrReplaceHeader) { + THttpHeaders h1; + + addHeader(h1, "h1", "v1"); + + addOrReplaceHeader(h1, "h2", "v1"); + addOrReplaceHeader(h1, "h2", "v2"); + addOrReplaceHeader(h1, "h2", "v3"); + addHeader(h1, "h2", "v4"); + + addHeader(h1, "h3", "v1"); + addHeader(h1, "h3", "v2"); + addOrReplaceHeader(h1, "h3", "v3"); + + THeadersExistence h2; + + h2.Add("h1", "v1"); + + h2.Add("h2", "v3"); + h2.Add("h2", "v4"); + + h2.Add("h3", "v2"); + h2.Add("h3", "v3"); + + UNIT_ASSERT(h2 == h1); +} + +void THttpHeadersTest::TestAddHeaderTemplateness() { + THttpHeaders h1; + h1.AddHeader("h1", "v1"); + h1.AddHeader("h2", TString("v2")); + h1.AddHeader("h3", TStringBuf("v3")); + h1.AddHeader("h4", TStringBuf("v4")); + + THeadersExistence h2; + h2.Add("h1", "v1"); + h2.Add("h2", "v2"); + h2.Add("h3", "v3"); + h2.Add("h4", "v4"); + + UNIT_ASSERT(h1 == h2); +} + +void THttpHeadersTest::TestFindHeader() { + THttpHeaders sut; + sut.AddHeader("NaMe", "Value"); + + UNIT_ASSERT(sut.FindHeader("name")); + UNIT_ASSERT(sut.FindHeader("name")->Value() == "Value"); +} diff --git a/library/cpp/http/io/list_codings/main.cpp b/library/cpp/http/io/list_codings/main.cpp new file mode 100644 index 0000000000..9818d02bdf --- /dev/null +++ b/library/cpp/http/io/list_codings/main.cpp @@ -0,0 +1,8 @@ +#include <library/cpp/http/io/stream.h> +#include <util/stream/output.h> + +int main() { + for (auto codec : SupportedCodings()) { + Cout << codec << Endl; + } +} diff --git a/library/cpp/http/io/list_codings/ya.make b/library/cpp/http/io/list_codings/ya.make new file mode 100644 index 0000000000..e5c5fed6dc --- /dev/null +++ b/library/cpp/http/io/list_codings/ya.make @@ -0,0 +1,13 @@ +PROGRAM() + +OWNER(pg) + +PEERDIR( + library/cpp/http/io +) + +SRCS( + main.cpp +) + +END() diff --git a/library/cpp/http/io/stream.cpp b/library/cpp/http/io/stream.cpp new file mode 100644 index 0000000000..6689be684f --- /dev/null +++ b/library/cpp/http/io/stream.cpp @@ -0,0 +1,1005 @@ +#include "stream.h" + +#include "compression.h" +#include "chunk.h" + +#include <util/stream/buffered.h> +#include <util/stream/length.h> +#include <util/stream/multi.h> +#include <util/stream/null.h> +#include <util/stream/tee.h> + +#include <util/system/compat.h> +#include <util/system/yassert.h> + +#include <util/network/socket.h> + +#include <util/string/cast.h> +#include <util/string/strip.h> + +#include <util/generic/string.h> +#include <util/generic/utility.h> +#include <util/generic/hash_set.h> +#include <util/generic/yexception.h> + +#define HEADERCMP(header, str) \ + case sizeof(str) - 1: \ + if (!stricmp((header).Name().data(), str)) + +namespace { + inline size_t SuggestBufferSize() { + return 8192; + } + + inline TStringBuf Trim(const char* b, const char* e) noexcept { + return StripString(TStringBuf(b, e)); + } + + inline TStringBuf RmSemiColon(const TStringBuf& s) { + return s.Before(';'); + } + + template <class T, size_t N> + class TStreams: private TNonCopyable { + struct TDelete { + inline void operator()(T* t) noexcept { + delete t; + } + }; + + typedef T* TPtr; + + public: + inline TStreams() noexcept + : Beg_(T_ + N) + { + } + + inline ~TStreams() { + TDelete f; + + ForEach(f); + } + + template <class S> + inline S* Add(S* t) noexcept { + return (S*)AddImpl((T*)t); + } + + template <class Functor> + inline void ForEach(Functor& f) { + const TPtr* end = T_ + N; + + for (TPtr* cur = Beg_; cur != end; ++cur) { + f(*cur); + } + } + + TPtr Top() { + const TPtr* end = T_ + N; + return end == Beg_ ? nullptr : *Beg_; + } + + private: + inline T* AddImpl(T* t) noexcept { + Y_ASSERT(Beg_ > T_); + + return (*--Beg_ = t); + } + + private: + TPtr T_[N]; + TPtr* Beg_; + }; + + template <class TStream> + class TLazy: public IOutputStream { + public: + TLazy(IOutputStream* out, ui16 bs) + : Output_(out) + , BlockSize_(bs) + { + } + + void DoWrite(const void* buf, size_t len) override { + ConstructSlave(); + Slave_->Write(buf, len); + } + + void DoFlush() override { + ConstructSlave(); + Slave_->Flush(); + } + + void DoFinish() override { + ConstructSlave(); + Slave_->Finish(); + } + + private: + inline void ConstructSlave() { + if (!Slave_) { + Slave_.Reset(new TStream(Output_, BlockSize_)); + } + } + + private: + IOutputStream* Output_; + ui16 BlockSize_; + THolder<IOutputStream> Slave_; + }; +} + +class THttpInput::TImpl { + typedef THashSet<TString> TAcceptCodings; + +public: + inline TImpl(IInputStream* slave) + : Slave_(slave) + , Buffered_(Slave_, SuggestBufferSize()) + , ChunkedInput_(nullptr) + , Input_(nullptr) + , FirstLine_(ReadFirstLine(Buffered_)) + , Headers_(&Buffered_) + , KeepAlive_(false) + , HasContentLength_(false) + , ContentLength_(0) + , ContentEncoded_(false) + , Expect100Continue_(false) + { + BuildInputChain(); + Y_ASSERT(Input_); + } + + static TString ReadFirstLine(TBufferedInput& in) { + TString s; + Y_ENSURE_EX(in.ReadLine(s), THttpReadException() << "Failed to get first line"); + return s; + } + + inline ~TImpl() { + } + + inline size_t Read(void* buf, size_t len) { + return Perform(len, [this, buf](size_t toRead) { return Input_->Read(buf, toRead); }); + } + + inline size_t Skip(size_t len) { + return Perform(len, [this](size_t toSkip) { return Input_->Skip(toSkip); }); + } + + inline const TString& FirstLine() const noexcept { + return FirstLine_; + } + + inline const THttpHeaders& Headers() const noexcept { + return Headers_; + } + + inline const TMaybe<THttpHeaders>& Trailers() const noexcept { + return Trailers_; + } + + inline bool IsKeepAlive() const noexcept { + return KeepAlive_; + } + + inline bool AcceptEncoding(const TString& s) const { + return Codings_.find(to_lower(s)) != Codings_.end(); + } + + inline bool GetContentLength(ui64& value) const noexcept { + if (HasContentLength_) { + value = ContentLength_; + return true; + } + return false; + } + + inline bool ContentEncoded() const noexcept { + return ContentEncoded_; + } + + inline bool HasContent() const noexcept { + return HasContentLength_ || ChunkedInput_; + } + + inline bool HasExpect100Continue() const noexcept { + return Expect100Continue_; + } + +private: + template <class Operation> + inline size_t Perform(size_t len, const Operation& operation) { + size_t processed = operation(len); + if (processed == 0 && len > 0) { + if (!ChunkedInput_) { + Trailers_.ConstructInPlace(); + } else { + // Read the header of the trailing chunk. It remains in + // the TChunkedInput stream if the HTTP response is compressed. + char symbol; + if (ChunkedInput_->Read(&symbol, 1) != 0) { + ythrow THttpParseException() << "some data remaining in TChunkedInput"; + } + } + } + return processed; + } + + struct TParsedHeaders { + bool Chunked = false; + bool KeepAlive = false; + TStringBuf LZipped; + }; + + struct TTrEnc { + inline void operator()(const TStringBuf& s) { + if (s == TStringBuf("chunked")) { + p->Chunked = true; + } + } + + TParsedHeaders* p; + }; + + struct TAccCoding { + inline void operator()(const TStringBuf& s) { + c->insert(ToString(s)); + } + + TAcceptCodings* c; + }; + + template <class Functor> + inline void ForEach(TString in, Functor& f) { + in.to_lower(); + + const char* b = in.begin(); + const char* c = b; + const char* e = in.end(); + + while (c != e) { + if (*c == ',') { + f(RmSemiColon(Trim(b, c))); + b = c + 1; + } + + ++c; + } + + if (b != c) { + f(RmSemiColon(Trim(b, c))); + } + } + + inline bool IsRequest() const { + return strnicmp(FirstLine().data(), "get", 3) == 0 || + strnicmp(FirstLine().data(), "post", 4) == 0 || + strnicmp(FirstLine().data(), "put", 3) == 0 || + strnicmp(FirstLine().data(), "patch", 5) == 0 || + strnicmp(FirstLine().data(), "head", 4) == 0 || + strnicmp(FirstLine().data(), "delete", 6) == 0; + } + + inline void BuildInputChain() { + TParsedHeaders p; + + size_t pos = FirstLine_.rfind(' '); + // In HTTP/1.1 Keep-Alive is turned on by default + if (pos != TString::npos && strcmp(FirstLine_.c_str() + pos + 1, "HTTP/1.1") == 0) { + p.KeepAlive = true; //request + } else if (strnicmp(FirstLine_.data(), "HTTP/1.1", 8) == 0) { + p.KeepAlive = true; //reply + } + + for (THttpHeaders::TConstIterator h = Headers_.Begin(); h != Headers_.End(); ++h) { + const THttpInputHeader& header = *h; + switch (header.Name().size()) { + HEADERCMP(header, "transfer-encoding") { + TTrEnc f = {&p}; + ForEach(header.Value(), f); + } + break; + HEADERCMP(header, "content-encoding") { + p.LZipped = header.Value(); + } + break; + HEADERCMP(header, "accept-encoding") { + TAccCoding f = {&Codings_}; + ForEach(header.Value(), f); + } + break; + HEADERCMP(header, "content-length") { + HasContentLength_ = true; + ContentLength_ = FromString(header.Value()); + } + break; + HEADERCMP(header, "connection") { + // accept header "Connection: Keep-Alive, TE" + if (strnicmp(header.Value().data(), "keep-alive", 10) == 0) { + p.KeepAlive = true; + } else if (stricmp(header.Value().data(), "close") == 0) { + p.KeepAlive = false; + } + } + [[fallthrough]]; + HEADERCMP(header, "expect") { + auto findContinue = [&](const TStringBuf& s) { + if (strnicmp(s.data(), "100-continue", 13) == 0) { + Expect100Continue_ = true; + } + }; + ForEach(header.Value(), findContinue); + } + break; + } + } + + if (p.Chunked) { + ChunkedInput_ = Streams_.Add(new TChunkedInput(&Buffered_, &Trailers_)); + Input_ = ChunkedInput_; + } else { + // disable buffering + Buffered_.Reset(&Cnull); + Input_ = Streams_.Add(new TMultiInput(&Buffered_, Slave_)); + + if (IsRequest() || HasContentLength_) { + /* + * TODO - we have other cases + */ + Input_ = Streams_.Add(new TLengthLimitedInput(Input_, ContentLength_)); + } + } + + if (auto decoder = TCompressionCodecFactory::Instance().FindDecoder(p.LZipped)) { + ContentEncoded_ = true; + Input_ = Streams_.Add((*decoder)(Input_).Release()); + } + + KeepAlive_ = p.KeepAlive; + } + +private: + IInputStream* Slave_; + + /* + * input helpers + */ + TBufferedInput Buffered_; + TStreams<IInputStream, 8> Streams_; + IInputStream* ChunkedInput_; + + /* + * final input stream + */ + IInputStream* Input_; + + TString FirstLine_; + THttpHeaders Headers_; + TMaybe<THttpHeaders> Trailers_; + bool KeepAlive_; + + TAcceptCodings Codings_; + + bool HasContentLength_; + ui64 ContentLength_; + + bool ContentEncoded_; + bool Expect100Continue_; +}; + +THttpInput::THttpInput(IInputStream* slave) + : Impl_(new TImpl(slave)) +{ +} + +THttpInput::THttpInput(THttpInput&& httpInput) = default; + +THttpInput::~THttpInput() { +} + +size_t THttpInput::DoRead(void* buf, size_t len) { + return Impl_->Read(buf, len); +} + +size_t THttpInput::DoSkip(size_t len) { + return Impl_->Skip(len); +} + +const THttpHeaders& THttpInput::Headers() const noexcept { + return Impl_->Headers(); +} + +const TMaybe<THttpHeaders>& THttpInput::Trailers() const noexcept { + return Impl_->Trailers(); +} + +const TString& THttpInput::FirstLine() const noexcept { + return Impl_->FirstLine(); +} + +bool THttpInput::IsKeepAlive() const noexcept { + return Impl_->IsKeepAlive(); +} + +bool THttpInput::AcceptEncoding(const TString& coding) const { + return Impl_->AcceptEncoding(coding); +} + +TString THttpInput::BestCompressionScheme(TArrayRef<const TStringBuf> codings) const { + return NHttp::ChooseBestCompressionScheme( + [this](const TString& coding) { + return AcceptEncoding(coding); + }, + codings + ); +} + +TString THttpInput::BestCompressionScheme() const { + return BestCompressionScheme(TCompressionCodecFactory::Instance().GetBestCodecs()); +} + +bool THttpInput::GetContentLength(ui64& value) const noexcept { + return Impl_->GetContentLength(value); +} + +bool THttpInput::ContentEncoded() const noexcept { + return Impl_->ContentEncoded(); +} + +bool THttpInput::HasContent() const noexcept { + return Impl_->HasContent(); +} + +bool THttpInput::HasExpect100Continue() const noexcept { + return Impl_->HasExpect100Continue(); +} + +class THttpOutput::TImpl { + class TSizeCalculator: public IOutputStream { + public: + inline TSizeCalculator() noexcept { + } + + ~TSizeCalculator() override { + } + + void DoWrite(const void* /*buf*/, size_t len) override { + Length_ += len; + } + + inline size_t Length() const noexcept { + return Length_; + } + + private: + size_t Length_ = 0; + }; + + enum TState { + Begin = 0, + FirstLineSent = 1, + HeadersSent = 2 + }; + + struct TFlush { + inline void operator()(IOutputStream* s) { + s->Flush(); + } + }; + + struct TFinish { + inline void operator()(IOutputStream* s) { + s->Finish(); + } + }; + +public: + inline TImpl(IOutputStream* slave, THttpInput* request) + : Slave_(slave) + , State_(Begin) + , Output_(Slave_) + , Request_(request) + , Version_(1100) + , KeepAliveEnabled_(false) + , BodyEncodingEnabled_(true) + , CompressionHeaderEnabled_(true) + , Finished_(false) + { + } + + inline ~TImpl() { + } + + inline void SendContinue() { + Output_->Write("HTTP/1.1 100 Continue\r\n\r\n"); + Output_->Flush(); + } + + inline void Write(const void* buf, size_t len) { + if (Finished_) { + ythrow THttpException() << "can not write to finished stream"; + } + + if (State_ == HeadersSent) { + Output_->Write(buf, len); + + return; + } + + const char* b = (const char*)buf; + const char* e = b + len; + const char* c = b; + + while (c != e) { + if (*c == '\n') { + Line_.append(b, c); + + if (!Line_.empty() && Line_.back() == '\r') { + Line_.pop_back(); + } + + b = c + 1; + + Process(Line_); + + if (State_ == HeadersSent) { + Output_->Write(b, e - b); + + return; + } + + Line_.clear(); + } + + ++c; + } + + if (b != c) { + Line_.append(b, c); + } + } + + inline void Flush() { + TFlush f; + Streams_.ForEach(f); + Slave_->Flush(); // see SEARCH-1030 + } + + inline void Finish() { + if (Finished_) { + return; + } + + TFinish f; + Streams_.ForEach(f); + Slave_->Finish(); // see SEARCH-1030 + + Finished_ = true; + } + + inline const THttpHeaders& SentHeaders() const noexcept { + return Headers_; + } + + inline void EnableCompression(TArrayRef<const TStringBuf> schemas) { + ComprSchemas_ = schemas; + } + + inline void EnableKeepAlive(bool enable) { + KeepAliveEnabled_ = enable; + } + + inline void EnableBodyEncoding(bool enable) { + BodyEncodingEnabled_ = enable; + } + + inline void EnableCompressionHeader(bool enable) { + CompressionHeaderEnabled_ = enable; + } + + inline bool IsCompressionEnabled() const noexcept { + return !ComprSchemas_.empty(); + } + + inline bool IsKeepAliveEnabled() const noexcept { + return KeepAliveEnabled_; + } + + inline bool IsBodyEncodingEnabled() const noexcept { + return BodyEncodingEnabled_; + } + + inline bool IsCompressionHeaderEnabled() const noexcept { + return CompressionHeaderEnabled_; + } + + inline bool CanBeKeepAlive() const noexcept { + return SupportChunkedTransfer() && IsKeepAliveEnabled() && (Request_ ? Request_->IsKeepAlive() : true); + } + + inline const TString& FirstLine() const noexcept { + return FirstLine_; + } + + inline size_t SentSize() const noexcept { + return SizeCalculator_.Length(); + } + +private: + static inline bool IsResponse(const TString& s) noexcept { + return strnicmp(s.data(), "HTTP/", 5) == 0; + } + + static inline bool IsRequest(const TString& s) noexcept { + return !IsResponse(s); + } + + inline bool IsHttpRequest() const noexcept { + return IsRequest(FirstLine_); + } + + inline bool HasResponseBody() const noexcept { + if (IsHttpResponse()) { + if (Request_ && Request_->FirstLine().StartsWith(TStringBuf("HEAD"))) + return false; + if (FirstLine_.size() > 9 && strncmp(FirstLine_.data() + 9, "204", 3) == 0) + return false; + return true; + } + return false; + } + + inline bool IsHttpResponse() const noexcept { + return IsResponse(FirstLine_); + } + + inline bool HasRequestBody() const noexcept { + return strnicmp(FirstLine_.data(), "POST", 4) == 0 || + strnicmp(FirstLine_.data(), "PATCH", 5) == 0 || + strnicmp(FirstLine_.data(), "PUT", 3) == 0; + } + static inline size_t ParseHttpVersion(const TString& s) { + if (s.empty()) { + ythrow THttpParseException() << "malformed http stream"; + } + + size_t parsed_version = 0; + + if (IsResponse(s)) { + const char* b = s.data() + 5; + + while (*b && *b != ' ') { + if (*b != '.') { + parsed_version *= 10; + parsed_version += (*b - '0'); + } + + ++b; + } + } else { + /* + * s not empty here + */ + const char* e = s.end() - 1; + const char* b = s.begin(); + size_t mult = 1; + + while (e != b && *e != '/') { + if (*e != '.') { + parsed_version += (*e - '0') * mult; + mult *= 10; + } + + --e; + } + } + + return parsed_version * 100; + } + + inline void ParseHttpVersion() { + size_t parsed_version = ParseHttpVersion(FirstLine_); + + if (Request_) { + parsed_version = Min(parsed_version, ParseHttpVersion(Request_->FirstLine())); + } + + Version_ = parsed_version; + } + + inline void Process(const TString& s) { + Y_ASSERT(State_ != HeadersSent); + + if (State_ == Begin) { + FirstLine_ = s; + ParseHttpVersion(); + State_ = FirstLineSent; + } else { + if (s.empty()) { + BuildOutputStream(); + WriteCached(); + State_ = HeadersSent; + } else { + AddHeader(THttpInputHeader(s)); + } + } + } + + inline void WriteCachedImpl(IOutputStream* s) const { + s->Write(FirstLine_.data(), FirstLine_.size()); + s->Write("\r\n", 2); + Headers_.OutTo(s); + s->Write("\r\n", 2); + s->Finish(); + } + + inline void WriteCached() { + size_t buflen = 0; + + { + TSizeCalculator out; + + WriteCachedImpl(&out); + buflen = out.Length(); + } + + { + TBufferedOutput out(Slave_, buflen); + + WriteCachedImpl(&out); + } + + if (IsHttpRequest() && !HasRequestBody()) { + /* + * if this is http request, then send it now + */ + + Slave_->Flush(); + } + } + + inline bool SupportChunkedTransfer() const noexcept { + return Version_ >= 1100; + } + + inline void BuildOutputStream() { + if (CanBeKeepAlive()) { + AddOrReplaceHeader(THttpInputHeader("Connection", "Keep-Alive")); + } else { + AddOrReplaceHeader(THttpInputHeader("Connection", "Close")); + } + + if (IsHttpResponse()) { + if (Request_ && IsCompressionEnabled() && HasResponseBody()) { + TString scheme = Request_->BestCompressionScheme(ComprSchemas_); + if (scheme != "identity") { + AddOrReplaceHeader(THttpInputHeader("Content-Encoding", scheme)); + RemoveHeader("Content-Length"); + } + } + + RebuildStream(); + } else { + if (IsCompressionEnabled()) { + AddOrReplaceHeader(THttpInputHeader("Accept-Encoding", BuildAcceptEncoding())); + } + if (HasRequestBody()) { + RebuildStream(); + } + } + } + + inline TString BuildAcceptEncoding() const { + TString ret; + + for (const auto& coding : ComprSchemas_) { + if (ret) { + ret += ", "; + } + + ret += coding; + } + + return ret; + } + + inline void RebuildStream() { + bool keepAlive = false; + const TCompressionCodecFactory::TEncoderConstructor* encoder = nullptr; + bool chunked = false; + bool haveContentLength = false; + + for (THttpHeaders::TConstIterator h = Headers_.Begin(); h != Headers_.End(); ++h) { + const THttpInputHeader& header = *h; + const TString hl = to_lower(header.Name()); + + if (hl == TStringBuf("connection")) { + keepAlive = to_lower(header.Value()) == TStringBuf("keep-alive"); + } else if (IsCompressionHeaderEnabled() && hl == TStringBuf("content-encoding")) { + encoder = TCompressionCodecFactory::Instance().FindEncoder(to_lower(header.Value())); + } else if (hl == TStringBuf("transfer-encoding")) { + chunked = to_lower(header.Value()) == TStringBuf("chunked"); + } else if (hl == TStringBuf("content-length")) { + haveContentLength = true; + } + } + + if (!haveContentLength && !chunked && (IsHttpRequest() || HasResponseBody()) && SupportChunkedTransfer() && (keepAlive || encoder || IsHttpRequest())) { + AddHeader(THttpInputHeader("Transfer-Encoding", "chunked")); + chunked = true; + } + + if (IsBodyEncodingEnabled() && chunked) { + Output_ = Streams_.Add(new TChunkedOutput(Output_)); + } + + Output_ = Streams_.Add(new TTeeOutput(Output_, &SizeCalculator_)); + + if (IsBodyEncodingEnabled() && encoder) { + Output_ = Streams_.Add((*encoder)(Output_).Release()); + } + } + + inline void AddHeader(const THttpInputHeader& hdr) { + Headers_.AddHeader(hdr); + } + + inline void AddOrReplaceHeader(const THttpInputHeader& hdr) { + Headers_.AddOrReplaceHeader(hdr); + } + + inline void RemoveHeader(const TString& hdr) { + Headers_.RemoveHeader(hdr); + } + +private: + IOutputStream* Slave_; + TState State_; + IOutputStream* Output_; + TStreams<IOutputStream, 8> Streams_; + TString Line_; + TString FirstLine_; + THttpHeaders Headers_; + THttpInput* Request_; + size_t Version_; + + TArrayRef<const TStringBuf> ComprSchemas_; + + bool KeepAliveEnabled_; + bool BodyEncodingEnabled_; + bool CompressionHeaderEnabled_; + + bool Finished_; + + TSizeCalculator SizeCalculator_; +}; + +THttpOutput::THttpOutput(IOutputStream* slave) + : Impl_(new TImpl(slave, nullptr)) +{ +} + +THttpOutput::THttpOutput(IOutputStream* slave, THttpInput* request) + : Impl_(new TImpl(slave, request)) +{ +} + +THttpOutput::~THttpOutput() { + try { + Finish(); + } catch (...) { + } +} + +void THttpOutput::DoWrite(const void* buf, size_t len) { + Impl_->Write(buf, len); +} + +void THttpOutput::DoFlush() { + Impl_->Flush(); +} + +void THttpOutput::DoFinish() { + Impl_->Finish(); +} + +const THttpHeaders& THttpOutput::SentHeaders() const noexcept { + return Impl_->SentHeaders(); +} + +void THttpOutput::EnableCompression(bool enable) { + if (enable) { + EnableCompression(TCompressionCodecFactory::Instance().GetBestCodecs()); + } else { + TArrayRef<TStringBuf> codings; + EnableCompression(codings); + } +} + +void THttpOutput::EnableCompression(TArrayRef<const TStringBuf> schemas) { + Impl_->EnableCompression(schemas); +} + +void THttpOutput::EnableKeepAlive(bool enable) { + Impl_->EnableKeepAlive(enable); +} + +void THttpOutput::EnableBodyEncoding(bool enable) { + Impl_->EnableBodyEncoding(enable); +} + +void THttpOutput::EnableCompressionHeader(bool enable) { + Impl_->EnableCompressionHeader(enable); +} + +bool THttpOutput::IsKeepAliveEnabled() const noexcept { + return Impl_->IsKeepAliveEnabled(); +} + +bool THttpOutput::IsBodyEncodingEnabled() const noexcept { + return Impl_->IsBodyEncodingEnabled(); +} + +bool THttpOutput::IsCompressionEnabled() const noexcept { + return Impl_->IsCompressionEnabled(); +} + +bool THttpOutput::IsCompressionHeaderEnabled() const noexcept { + return Impl_->IsCompressionHeaderEnabled(); +} + +bool THttpOutput::CanBeKeepAlive() const noexcept { + return Impl_->CanBeKeepAlive(); +} + +void THttpOutput::SendContinue() { + Impl_->SendContinue(); +} + +const TString& THttpOutput::FirstLine() const noexcept { + return Impl_->FirstLine(); +} + +size_t THttpOutput::SentSize() const noexcept { + return Impl_->SentSize(); +} + +unsigned ParseHttpRetCode(const TStringBuf& ret) { + const TStringBuf code = StripString(StripString(ret.After(' ')).Before(' ')); + + return FromString<unsigned>(code.data(), code.size()); +} + +void SendMinimalHttpRequest(TSocket& s, const TStringBuf& host, const TStringBuf& request, const TStringBuf& agent, const TStringBuf& from) { + TSocketOutput so(s); + THttpOutput output(&so); + + output.EnableKeepAlive(false); + output.EnableCompression(false); + + const IOutputStream::TPart parts[] = { + IOutputStream::TPart(TStringBuf("GET ")), + IOutputStream::TPart(request), + IOutputStream::TPart(TStringBuf(" HTTP/1.1")), + IOutputStream::TPart::CrLf(), + IOutputStream::TPart(TStringBuf("Host: ")), + IOutputStream::TPart(host), + IOutputStream::TPart::CrLf(), + IOutputStream::TPart(TStringBuf("User-Agent: ")), + IOutputStream::TPart(agent), + IOutputStream::TPart::CrLf(), + IOutputStream::TPart(TStringBuf("From: ")), + IOutputStream::TPart(from), + IOutputStream::TPart::CrLf(), + IOutputStream::TPart::CrLf(), + }; + + output.Write(parts, sizeof(parts) / sizeof(*parts)); + output.Finish(); +} + +TArrayRef<const TStringBuf> SupportedCodings() { + return TCompressionCodecFactory::Instance().GetBestCodecs(); +} diff --git a/library/cpp/http/io/stream.h b/library/cpp/http/io/stream.h new file mode 100644 index 0000000000..78ca4fc814 --- /dev/null +++ b/library/cpp/http/io/stream.h @@ -0,0 +1,178 @@ +#pragma once + +#include "headers.h" + +#include <util/stream/output.h> +#include <util/generic/maybe.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> +#include <util/generic/array_ref.h> + +class TSocket; + +struct THttpException: public yexception { +}; + +struct THttpParseException: public THttpException { +}; + +struct THttpReadException: public THttpException { +}; + +/// Чтение ответа HTTP-сервера. +class THttpInput: public IInputStream { +public: + THttpInput(IInputStream* slave); + THttpInput(THttpInput&& httpInput); + ~THttpInput() override; + + /* + * parsed http headers + */ + /// Возвращает контейнер с заголовками ответа HTTP-сервера. + const THttpHeaders& Headers() const noexcept; + + /* + * parsed http trailers + */ + /// Возвращает контейнер (возможно пустой) с trailer'ами ответа HTTP-сервера. + /// Поток должен быть вычитан полностью прежде чем trailer'ы будут доступны. + /// Пока поток не вычитан до конца возвращается Nothing. + /// https://tools.ietf.org/html/rfc7230#section-4.1.2 + const TMaybe<THttpHeaders>& Trailers() const noexcept; + + /* + * first line - response or request + */ + /// Возвращает первую строку ответа HTTP-сервера. + /// @details Первая строка HTTP-сервера - строка состояния, + /// содержащая три поля: версию HTTP, код состояния и описание. + const TString& FirstLine() const noexcept; + + /* + * connection can be keep-alive + */ + /// Проверяет, не завершено ли соединение с сервером. + /// @details Транзакция считается завершенной, если не передан заголовок + /// "Connection: Keep Alive". + bool IsKeepAlive() const noexcept; + + /* + * output data can be encoded + */ + /// Проверяет, поддерживается ли данный тип кодирования содержимого + /// ответа HTTP-сервера. + bool AcceptEncoding(const TString& coding) const; + + /// Пытается определить наилучший тип кодирования ответа HTTP-сервера. + /// @details Если ответ сервера говорит о том, что поддерживаются + /// любые типы кодирования, выбирается gzip. В противном случае + /// из списка типов кодирования выбирается лучший из поддерживаемых сервером. + TString BestCompressionScheme() const; + TString BestCompressionScheme(TArrayRef<const TStringBuf> codings) const; + + /// Если заголовки содержат Content-Length, возвращает true и + /// записывает значение из заголовка в value + bool GetContentLength(ui64& value) const noexcept; + + /// Признак запакованности данных, - если выставлен, то Content-Length, при наличии в заголовках, + /// показывает объём запакованных данных, а из THttpInput мы будем вычитывать уже распакованные. + bool ContentEncoded() const noexcept; + + /// Returns true if Content-Length or Transfer-Encoding header received + bool HasContent() const noexcept; + + bool HasExpect100Continue() const noexcept; + +private: + size_t DoRead(void* buf, size_t len) override; + size_t DoSkip(size_t len) override; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; + +/// Передача запроса HTTP-серверу. +class THttpOutput: public IOutputStream { +public: + THttpOutput(IOutputStream* slave); + THttpOutput(IOutputStream* slave, THttpInput* request); + ~THttpOutput() override; + + /* + * sent http headers + */ + /// Возвращает контейнер с заголовками запроса к HTTP-серверу. + const THttpHeaders& SentHeaders() const noexcept; + + /// Устанавливает режим, при котором сервер выдает ответ в упакованном виде. + void EnableCompression(bool enable); + void EnableCompression(TArrayRef<const TStringBuf> schemas); + + /// Устанавливает режим, при котором соединение с сервером не завершается + /// после окончания транзакции. + void EnableKeepAlive(bool enable); + + /// Устанавливает режим, при котором тело HTTP-запроса/ответа преобразуется в соответствии + /// с заголовками Content-Encoding и Transfer-Encoding (включен по умолчанию) + void EnableBodyEncoding(bool enable); + + /// Устанавливает режим, при котором тело HTTP-ответа сжимается кодеком + /// указанным в Content-Encoding (включен по умолчанию) + void EnableCompressionHeader(bool enable); + + /// Проверяет, производится ли выдача ответов в упакованном виде. + bool IsCompressionEnabled() const noexcept; + + /// Проверяет, не завершается ли соединение с сервером после окончания транзакции. + bool IsKeepAliveEnabled() const noexcept; + + /// Проверяет, преобразуется ли тело HTTP-запроса/ответа в соответствии + /// с заголовками Content-Encoding и Transfer-Encoding + bool IsBodyEncodingEnabled() const noexcept; + + /// Проверяет, сжимается ли тело HTTP-ответа кодеком + /// указанным в Content-Encoding + bool IsCompressionHeaderEnabled() const noexcept; + + /* + * is this connection can be really keep-alive + */ + /// Проверяет, можно ли установить режим, при котором соединение с сервером + /// не завершается после окончания транзакции. + bool CanBeKeepAlive() const noexcept; + + void SendContinue(); + + /* + * first line - response or request + */ + /// Возвращает первую строку HTTP-запроса/ответа + const TString& FirstLine() const noexcept; + + /// Возвращает размер отправленных данных (без заголовков, с учётом сжатия, без + /// учёта chunked transfer encoding) + size_t SentSize() const noexcept; + +private: + void DoWrite(const void* buf, size_t len) override; + void DoFlush() override; + void DoFinish() override; + +private: + class TImpl; + THolder<TImpl> Impl_; +}; + +/// Возвращает код состояния из ответа сервера. +unsigned ParseHttpRetCode(const TStringBuf& ret); + +/// Отправляет HTTP-серверу запрос с минимумом необходимых заголовков. +void SendMinimalHttpRequest(TSocket& s, const TStringBuf& host, const TStringBuf& request, const TStringBuf& agent = "YandexSomething/1.0", const TStringBuf& from = "webadmin@yandex.ru"); + +TArrayRef<const TStringBuf> SupportedCodings(); + +/// @} diff --git a/library/cpp/http/io/stream_ut.cpp b/library/cpp/http/io/stream_ut.cpp new file mode 100644 index 0000000000..1ea35df675 --- /dev/null +++ b/library/cpp/http/io/stream_ut.cpp @@ -0,0 +1,732 @@ +#include "stream.h" +#include "chunk.h" + +#include <library/cpp/http/server/http_ex.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/string/printf.h> +#include <util/network/socket.h> +#include <util/stream/file.h> +#include <util/stream/output.h> +#include <util/stream/tee.h> +#include <util/stream/zlib.h> +#include <util/stream/null.h> + +Y_UNIT_TEST_SUITE(THttpStreamTest) { + class TTestHttpServer: public THttpServer::ICallBack { + class TRequest: public THttpClientRequestEx { + public: + inline TRequest(TTestHttpServer* parent) + : Parent_(parent) + { + } + + bool Reply(void* /*tsr*/) override { + if (!ProcessHeaders()) { + return true; + } + + // Check that function will not hang on + Input().ReadAll(); + + // "lo" is for "local" + if (RD.ServerName() == "yandex.lo") { + // do redirect + Output() << "HTTP/1.1 301 Moved permanently\r\n" + "Location: http://www.yandex.lo\r\n" + "\r\n"; + } else if (RD.ServerName() == "www.yandex.lo") { + Output() << "HTTP/1.1 200 Ok\r\n" + "\r\n"; + } else { + Output() << "HTTP/1.1 200 Ok\r\n\r\n"; + if (Buf.Size()) { + Output().Write(Buf.AsCharPtr(), Buf.Size()); + } else { + Output() << Parent_->Res_; + } + } + Output().Finish(); + + Parent_->LastRequestSentSize_ = Output().SentSize(); + + return true; + } + + private: + TTestHttpServer* Parent_ = nullptr; + }; + + public: + inline TTestHttpServer(const TString& res) + : Res_(res) + { + } + + TClientRequest* CreateClient() override { + return new TRequest(this); + } + + size_t LastRequestSentSize() const { + return LastRequestSentSize_; + } + + private: + TString Res_; + size_t LastRequestSentSize_ = 0; + }; + + Y_UNIT_TEST(TestCodings1) { + UNIT_ASSERT(SupportedCodings().size() > 0); + } + + Y_UNIT_TEST(TestHttpInput) { + TString res = "I'm a teapot"; + TPortManager pm; + const ui16 port = pm.GetPort(); + + TTestHttpServer serverImpl(res); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).EnableCompression(true)); + + UNIT_ASSERT(server.Start()); + + TNetworkAddress addr("localhost", port); + TSocket s(addr); + + //TDebugOutput dbg; + TNullOutput dbg; + + { + TSocketOutput so(s); + TTeeOutput out(&so, &dbg); + THttpOutput output(&out); + + output.EnableKeepAlive(true); + output.EnableCompression(true); + + TString r; + r += "GET / HTTP/1.1"; + r += "\r\n"; + r += "Host: yandex.lo"; + r += "\r\n"; + r += "\r\n"; + + output.Write(r.data(), r.size()); + output.Finish(); + } + + { + TSocketInput si(s); + THttpInput input(&si); + unsigned httpCode = ParseHttpRetCode(input.FirstLine()); + UNIT_ASSERT_VALUES_EQUAL(httpCode / 10, 30u); + + TransferData(&input, &dbg); + } + server.Stop(); + } + + Y_UNIT_TEST(TestHttpInputDelete) { + TString res = "I'm a teapot"; + TPortManager pm; + const ui16 port = pm.GetPort(); + + TTestHttpServer serverImpl(res); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).EnableCompression(true)); + + UNIT_ASSERT(server.Start()); + + TNetworkAddress addr("localhost", port); + TSocket s(addr); + + //TDebugOutput dbg; + TNullOutput dbg; + + { + TSocketOutput so(s); + TTeeOutput out(&so, &dbg); + THttpOutput output(&out); + + output.EnableKeepAlive(true); + output.EnableCompression(true); + + TString r; + r += "DELETE / HTTP/1.1"; + r += "\r\n"; + r += "Host: yandex.lo"; + r += "\r\n"; + r += "\r\n"; + + output.Write(r.data(), r.size()); + output.Finish(); + } + + { + TSocketInput si(s); + THttpInput input(&si); + unsigned httpCode = ParseHttpRetCode(input.FirstLine()); + UNIT_ASSERT_VALUES_EQUAL(httpCode / 10, 30u); + + TransferData(&input, &dbg); + } + server.Stop(); + } + + Y_UNIT_TEST(TestParseHttpRetCode) { + UNIT_ASSERT_VALUES_EQUAL(ParseHttpRetCode("HTTP/1.1 301"), 301u); + } + + Y_UNIT_TEST(TestKeepAlive) { + { + TString s = "GET / HTTP/1.0\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(!in.IsKeepAlive()); + } + + { + TString s = "GET / HTTP/1.0\r\nConnection: keep-alive\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(in.IsKeepAlive()); + } + + { + TString s = "GET / HTTP/1.1\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(in.IsKeepAlive()); + } + + { + TString s = "GET / HTTP/1.1\r\nConnection: close\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(!in.IsKeepAlive()); + } + + { + TString s = "HTTP/1.0 200 Ok\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(!in.IsKeepAlive()); + } + + { + TString s = "HTTP/1.0 200 Ok\r\nConnection: keep-alive\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(in.IsKeepAlive()); + } + + { + TString s = "HTTP/1.1 200 Ok\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(in.IsKeepAlive()); + } + + { + TString s = "HTTP/1.1 200 Ok\r\nConnection: close\r\n\r\n"; + TStringInput si(s); + THttpInput in(&si); + UNIT_ASSERT(!in.IsKeepAlive()); + } + } + + Y_UNIT_TEST(TestMinRequest) { + TString res = "qqqqqq"; + TPortManager pm; + const ui16 port = pm.GetPort(); + + TTestHttpServer serverImpl(res); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).EnableCompression(true)); + + UNIT_ASSERT(server.Start()); + + TNetworkAddress addr("localhost", port); + + TSocket s(addr); + TNullOutput dbg; + + SendMinimalHttpRequest(s, "www.yandex.lo", "/"); + + TSocketInput si(s); + THttpInput input(&si); + unsigned httpCode = ParseHttpRetCode(input.FirstLine()); + UNIT_ASSERT_VALUES_EQUAL(httpCode, 200u); + + TransferData(&input, &dbg); + server.Stop(); + } + + Y_UNIT_TEST(TestResponseWithBlanks) { + TString res = "qqqqqq\r\n\r\nsdasdsad\r\n"; + TPortManager pm; + const ui16 port = pm.GetPort(); + + TTestHttpServer serverImpl(res); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).EnableCompression(true)); + + UNIT_ASSERT(server.Start()); + + TNetworkAddress addr("localhost", port); + + TSocket s(addr); + + SendMinimalHttpRequest(s, "www.yandex.ru", "/"); + + TSocketInput si(s); + THttpInput input(&si); + unsigned httpCode = ParseHttpRetCode(input.FirstLine()); + UNIT_ASSERT_VALUES_EQUAL(httpCode, 200u); + TString reply = input.ReadAll(); + UNIT_ASSERT_VALUES_EQUAL(reply, res); + server.Stop(); + } + + Y_UNIT_TEST(TestOutputFlush) { + TString str; + TStringOutput strOut(str); + TBufferedOutput bufOut(&strOut, 8192); + THttpOutput httpOut(&bufOut); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(true); + + const char* header = "GET / HTTP/1.1\r\nHost: yandex.ru\r\n\r\n"; + httpOut << header; + + unsigned curLen = str.size(); + const char* body = "<html>Hello</html>"; + httpOut << body; + UNIT_ASSERT_VALUES_EQUAL(curLen, str.size()); + httpOut.Flush(); + UNIT_ASSERT_VALUES_EQUAL(curLen + strlen(body), str.size()); + } + + Y_UNIT_TEST(TestOutputPostFlush) { + TString str; + TString checkStr; + TStringOutput strOut(str); + TStringOutput checkOut(checkStr); + TBufferedOutput bufOut(&strOut, 8192); + TTeeOutput teeOut(&bufOut, &checkOut); + THttpOutput httpOut(&teeOut); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(true); + + const char* header = "POST / HTTP/1.1\r\nHost: yandex.ru\r\n\r\n"; + httpOut << header; + + UNIT_ASSERT_VALUES_EQUAL(str.size(), 0u); + + const char* body = "<html>Hello</html>"; + httpOut << body; + UNIT_ASSERT_VALUES_EQUAL(str.size(), 0u); + + httpOut.Flush(); + UNIT_ASSERT_VALUES_EQUAL(checkStr.size(), str.size()); + } + + TString MakeHttpOutputBody(const char* body, bool encodingEnabled) { + TString str; + TStringOutput strOut(str); + { + TBufferedOutput bufOut(&strOut, 8192); + THttpOutput httpOut(&bufOut); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(true); + httpOut.EnableBodyEncoding(encodingEnabled); + + httpOut << "POST / HTTP/1.1\r\n"; + httpOut << "Host: yandex.ru\r\n"; + httpOut << "Content-Encoding: gzip\r\n"; + httpOut << "\r\n"; + + UNIT_ASSERT_VALUES_EQUAL(str.size(), 0u); + httpOut << body; + } + const char* bodyDelimiter = "\r\n\r\n"; + size_t bodyPos = str.find(bodyDelimiter); + UNIT_ASSERT(bodyPos != TString::npos); + return str.substr(bodyPos + strlen(bodyDelimiter)); + }; + + TString SimulateBodyEncoding(const char* body) { + TString bodyStr; + TStringOutput bodyOut(bodyStr); + TChunkedOutput chunkOut(&bodyOut); + TZLibCompress comprOut(&chunkOut, ZLib::GZip); + comprOut << body; + return bodyStr; + }; + + Y_UNIT_TEST(TestRebuildStreamOnPost) { + const char* body = "<html>Hello</html>"; + UNIT_ASSERT(MakeHttpOutputBody(body, false) == body); + UNIT_ASSERT(MakeHttpOutputBody(body, true) == SimulateBodyEncoding(body)); + } + + Y_UNIT_TEST(TestOutputFinish) { + TString str; + TStringOutput strOut(str); + TBufferedOutput bufOut(&strOut, 8192); + THttpOutput httpOut(&bufOut); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(true); + + const char* header = "GET / HTTP/1.1\r\nHost: yandex.ru\r\n\r\n"; + httpOut << header; + + unsigned curLen = str.size(); + const char* body = "<html>Hello</html>"; + httpOut << body; + UNIT_ASSERT_VALUES_EQUAL(curLen, str.size()); + httpOut.Finish(); + UNIT_ASSERT_VALUES_EQUAL(curLen + strlen(body), str.size()); + } + + Y_UNIT_TEST(TestMultilineHeaders) { + const char* headerLine0 = "HTTP/1.1 200 OK"; + const char* headerLine1 = "Content-Language: en"; + const char* headerLine2 = "Vary: Accept-Encoding, "; + const char* headerLine3 = "\tAccept-Language"; + const char* headerLine4 = "Content-Length: 18"; + + TString endLine("\r\n"); + TString r; + r += headerLine0 + endLine; + r += headerLine1 + endLine; + r += headerLine2 + endLine; + r += headerLine3 + endLine; + r += headerLine4 + endLine + endLine; + r += "<html>Hello</html>"; + TStringInput stringInput(r); + THttpInput input(&stringInput); + + const THttpHeaders& httpHeaders = input.Headers(); + UNIT_ASSERT_VALUES_EQUAL(httpHeaders.Count(), 3u); + + THttpHeaders::TConstIterator it = httpHeaders.Begin(); + UNIT_ASSERT_VALUES_EQUAL(it->ToString(), TString(headerLine1)); + UNIT_ASSERT_VALUES_EQUAL((++it)->ToString(), TString::Join(headerLine2, headerLine3)); + UNIT_ASSERT_VALUES_EQUAL((++it)->ToString(), TString(headerLine4)); + } + + Y_UNIT_TEST(ContentLengthRemoval) { + TMemoryInput request("GET / HTTP/1.1\r\nAccept-Encoding: gzip\r\n\r\n"); + THttpInput i(&request); + TString result; + TStringOutput out(result); + THttpOutput httpOut(&out, &i); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(true); + httpOut << "HTTP/1.1 200 OK\r\n"; + char answer[] = "Mary had a little lamb."; + httpOut << "Content-Length: " << strlen(answer) << "\r\n" + "\r\n"; + httpOut << answer; + httpOut.Finish(); + + Cdbg << result; + result.to_lower(); + UNIT_ASSERT(result.Contains("content-encoding: gzip")); + UNIT_ASSERT(!result.Contains("content-length")); + } + + Y_UNIT_TEST(CodecsPriority) { + TMemoryInput request("GET / HTTP/1.1\r\nAccept-Encoding: gzip, br\r\n\r\n"); + TVector<TStringBuf> codecs = {"br", "gzip"}; + + THttpInput i(&request); + TString result; + TStringOutput out(result); + THttpOutput httpOut(&out, &i); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(codecs); + httpOut << "HTTP/1.1 200 OK\r\n"; + char answer[] = "Mary had a little lamb."; + httpOut << "Content-Length: " << strlen(answer) << "\r\n" + "\r\n"; + httpOut << answer; + httpOut.Finish(); + + Cdbg << result; + result.to_lower(); + UNIT_ASSERT(result.Contains("content-encoding: br")); + } + + Y_UNIT_TEST(CodecsPriority2) { + TMemoryInput request("GET / HTTP/1.1\r\nAccept-Encoding: gzip, br\r\n\r\n"); + TVector<TStringBuf> codecs = {"gzip", "br"}; + + THttpInput i(&request); + TString result; + TStringOutput out(result); + THttpOutput httpOut(&out, &i); + + httpOut.EnableKeepAlive(true); + httpOut.EnableCompression(codecs); + httpOut << "HTTP/1.1 200 OK\r\n"; + char answer[] = "Mary had a little lamb."; + httpOut << "Content-Length: " << strlen(answer) << "\r\n" + "\r\n"; + httpOut << answer; + httpOut.Finish(); + + Cdbg << result; + result.to_lower(); + UNIT_ASSERT(result.Contains("content-encoding: gzip")); + } + + Y_UNIT_TEST(HasTrailers) { + TMemoryInput response( + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "3\r\n" + "foo" + "0\r\n" + "Bar: baz\r\n" + "\r\n"); + THttpInput i(&response); + TMaybe<THttpHeaders> trailers = i.Trailers(); + UNIT_ASSERT(!trailers.Defined()); + i.ReadAll(); + trailers = i.Trailers(); + UNIT_ASSERT_VALUES_EQUAL(trailers.GetRef().Count(), 1); + UNIT_ASSERT_VALUES_EQUAL(trailers.GetRef().Begin()->ToString(), "Bar: baz"); + } + + Y_UNIT_TEST(NoTrailersWithChunks) { + TMemoryInput response( + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "3\r\n" + "foo" + "0\r\n" + "\r\n"); + THttpInput i(&response); + TMaybe<THttpHeaders> trailers = i.Trailers(); + UNIT_ASSERT(!trailers.Defined()); + i.ReadAll(); + trailers = i.Trailers(); + UNIT_ASSERT_VALUES_EQUAL(trailers.GetRef().Count(), 0); + } + + Y_UNIT_TEST(NoTrailersNoChunks) { + TMemoryInput response( + "HTTP/1.1 200 OK\r\n" + "Content-Length: 3\r\n" + "\r\n" + "bar"); + THttpInput i(&response); + TMaybe<THttpHeaders> trailers = i.Trailers(); + UNIT_ASSERT(!trailers.Defined()); + i.ReadAll(); + trailers = i.Trailers(); + UNIT_ASSERT_VALUES_EQUAL(trailers.GetRef().Count(), 0); + } + + Y_UNIT_TEST(RequestWithoutContentLength) { + TStringStream request; + { + THttpOutput httpOutput(&request); + httpOutput << "POST / HTTP/1.1\r\n" + "Host: yandex.ru\r\n" + "\r\n"; + httpOutput << "GGLOL"; + } + { + TStringInput input(request.Str()); + THttpInput httpInput(&input); + bool chunkedOrHasContentLength = false; + for (const auto& header : httpInput.Headers()) { + if (header.Name() == "Transfer-Encoding" && header.Value() == "chunked" || header.Name() == "Content-Length") { + chunkedOrHasContentLength = true; + } + } + + // If request doesn't contain neither Content-Length header nor Transfer-Encoding header + // then server considers message body length to be zero. + // (See https://tools.ietf.org/html/rfc7230#section-3.3.3) + UNIT_ASSERT(chunkedOrHasContentLength); + + UNIT_ASSERT_VALUES_EQUAL(httpInput.ReadAll(), "GGLOL"); + } + } + + Y_UNIT_TEST(TestInputHasContent) { + { + TStringStream request; + request << "POST / HTTP/1.1\r\n" + "Host: yandex.ru\r\n" + "\r\n"; + request << "HTTPDATA"; + + TStringInput input(request.Str()); + THttpInput httpInput(&input); + + UNIT_ASSERT(!httpInput.HasContent()); + UNIT_ASSERT_VALUES_EQUAL(httpInput.ReadAll(), ""); + } + + { + TStringStream request; + request << "POST / HTTP/1.1\r\n" + "Host: yandex.ru\r\n" + "Content-Length: 8" + "\r\n\r\n"; + request << "HTTPDATA"; + + TStringInput input(request.Str()); + THttpInput httpInput(&input); + + UNIT_ASSERT(httpInput.HasContent()); + UNIT_ASSERT_VALUES_EQUAL(httpInput.ReadAll(), "HTTPDATA"); + } + + { + TStringStream request; + request << "POST / HTTP/1.1\r\n" + "Host: yandex.ru\r\n" + "Transfer-Encoding: chunked" + "\r\n\r\n"; + request << "8\r\nHTTPDATA\r\n0\r\n"; + + TStringInput input(request.Str()); + THttpInput httpInput(&input); + + UNIT_ASSERT(httpInput.HasContent()); + UNIT_ASSERT_VALUES_EQUAL(httpInput.ReadAll(), "HTTPDATA"); + } + } + + Y_UNIT_TEST(TestHttpInputHeadRequest) { + class THeadOnlyInput: public IInputStream { + public: + THeadOnlyInput() = default; + + private: + size_t DoRead(void* buf, size_t len) override { + if (Eof_) { + ythrow yexception() << "should not read after EOF"; + } + + const size_t toWrite = Min(len, Data_.size() - Pos_); + if (toWrite == 0) { + Eof_ = true; + return 0; + } + + memcpy(buf, Data_.data() + Pos_, toWrite); + Pos_ += toWrite; + return toWrite; + } + + private: + TString Data_{TStringBuf("HEAD / HTTP/1.1\r\nHost: yandex.ru\r\n\r\n")}; + size_t Pos_{0}; + bool Eof_{false}; + }; + THeadOnlyInput input; + THttpInput httpInput(&input); + + UNIT_ASSERT(!httpInput.HasContent()); + UNIT_ASSERT_VALUES_EQUAL(httpInput.ReadAll(), ""); + } + + Y_UNIT_TEST(TestHttpOutputResponseToHeadRequestNoZeroChunk) { + TStringStream request; + request << "HEAD / HTTP/1.1\r\n" + "Host: yandex.ru\r\n" + "Connection: Keep-Alive\r\n" + "\r\n"; + + TStringInput input(request.Str()); + THttpInput httpInput(&input); + + TStringStream outBuf; + THttpOutput out(&outBuf, &httpInput); + out.EnableKeepAlive(true); + out << "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\n\r\n"; + out << ""; + out.Finish(); + TString result = outBuf.Str(); + UNIT_ASSERT(!result.Contains(TStringBuf("0\r\n"))); + } + + Y_UNIT_TEST(TestHttpOutputDisableCompressionHeader) { + TMemoryInput request("GET / HTTP/1.1\r\nAccept-Encoding: gzip\r\n\r\n"); + const TString data = "qqqqqqqqqqqqqqqqqqqqqqqqqqqqqq"; + + THttpInput httpInput(&request); + TString result; + + { + TStringOutput output(result); + THttpOutput httpOutput(&output, &httpInput); + httpOutput.EnableCompressionHeader(false); + httpOutput << "HTTP/1.1 200 OK\r\n" + "content-encoding: gzip\r\n" + "\r\n" + data; + httpOutput.Finish(); + } + + UNIT_ASSERT(result.Contains("content-encoding: gzip")); + UNIT_ASSERT(result.Contains(data)); + } + + size_t DoTestHttpOutputSize(const TString& res, bool enableCompession) { + TTestHttpServer serverImpl(res); + TPortManager pm; + + const ui16 port = pm.GetPort(); + THttpServer server(&serverImpl, + THttpServer::TOptions(port) + .EnableKeepAlive(true) + .EnableCompression(enableCompession)); + UNIT_ASSERT(server.Start()); + + TNetworkAddress addr("localhost", port); + TSocket s(addr); + + { + TSocketOutput so(s); + THttpOutput out(&so); + out << "GET / HTTP/1.1\r\n" + "Host: www.yandex.ru\r\n" + "Connection: Keep-Alive\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + out.Finish(); + } + + TSocketInput si(s); + THttpInput input(&si); + + unsigned httpCode = ParseHttpRetCode(input.FirstLine()); + UNIT_ASSERT_VALUES_EQUAL(httpCode, 200u); + + UNIT_ASSERT_VALUES_EQUAL(res, input.ReadAll()); + + server.Stop(); + + return serverImpl.LastRequestSentSize(); + } + + Y_UNIT_TEST(TestHttpOutputSize) { + TString res = "qqqqqq"; + UNIT_ASSERT_VALUES_EQUAL(res.size(), DoTestHttpOutputSize(res, false)); + UNIT_ASSERT_VALUES_UNEQUAL(res.size(), DoTestHttpOutputSize(res, true)); + } +} // THttpStreamTest suite diff --git a/library/cpp/http/io/stream_ut_medium.cpp b/library/cpp/http/io/stream_ut_medium.cpp new file mode 100644 index 0000000000..2c125eb21e --- /dev/null +++ b/library/cpp/http/io/stream_ut_medium.cpp @@ -0,0 +1,54 @@ +#include "stream.h" +#include <library/cpp/testing/unittest/registar.h> +#include <util/stream/zlib.h> + +Y_UNIT_TEST_SUITE(THttpTestMedium) { + Y_UNIT_TEST(TestCodings2) { + TStringBuf data = "aaaaaaaaaaaaaaaaaaaaaaa"; + + for (auto codec : SupportedCodings()) { + if (codec == TStringBuf("z-zlib-0")) { + continue; + } + + if (codec == TStringBuf("z-null")) { + continue; + } + + TString s; + + { + TStringOutput so(s); + THttpOutput ho(&so); + TBufferedOutput bo(&ho, 10000); + + bo << "HTTP/1.1 200 Ok\r\n" + << "Connection: close\r\n" + << "Content-Encoding: " << codec << "\r\n\r\n"; + + for (size_t i = 0; i < 100; ++i) { + bo << data; + } + } + + try { + UNIT_ASSERT(s.size() > 10); + UNIT_ASSERT(s.find(data) == TString::npos); + } catch (...) { + Cerr << codec << " " << s << Endl; + + throw; + } + + { + TStringInput si(s); + THttpInput hi(&si); + + auto res = hi.ReadAll(); + + UNIT_ASSERT(res.find(data) == 0); + } + } + } + +} // THttpTestMedium suite diff --git a/library/cpp/http/io/ut/medium/ya.make b/library/cpp/http/io/ut/medium/ya.make new file mode 100644 index 0000000000..235a23dcd7 --- /dev/null +++ b/library/cpp/http/io/ut/medium/ya.make @@ -0,0 +1,11 @@ +UNITTEST_FOR(library/cpp/http/io) + +SIZE(MEDIUM) + +OWNER(g:util) + +SRCS( + stream_ut_medium.cpp +) + +END() diff --git a/library/cpp/http/io/ut/ya.make b/library/cpp/http/io/ut/ya.make new file mode 100644 index 0000000000..84f6949db3 --- /dev/null +++ b/library/cpp/http/io/ut/ya.make @@ -0,0 +1,16 @@ +UNITTEST_FOR(library/cpp/http/io) + +OWNER(g:util) + +PEERDIR( + library/cpp/http/server +) + +SRCS( + chunk_ut.cpp + compression_ut.cpp + headers_ut.cpp + stream_ut.cpp +) + +END() diff --git a/library/cpp/http/io/ya.make b/library/cpp/http/io/ya.make new file mode 100644 index 0000000000..dcfbd79885 --- /dev/null +++ b/library/cpp/http/io/ya.make @@ -0,0 +1,22 @@ +LIBRARY() + +OWNER( + g:util + mvel +) + +PEERDIR( + library/cpp/blockcodecs + library/cpp/streams/brotli + library/cpp/streams/bzip2 + library/cpp/streams/lzma +) + +SRCS( + chunk.cpp + compression.cpp + headers.cpp + stream.cpp +) + +END() diff --git a/library/cpp/http/misc/http_headers.h b/library/cpp/http/misc/http_headers.h new file mode 100644 index 0000000000..ff359937fa --- /dev/null +++ b/library/cpp/http/misc/http_headers.h @@ -0,0 +1,72 @@ +#pragma once + +#include <util/generic/strbuf.h> + + +/* Taken from SpringFramework's HttpHeaders. Docs: + * https://docs.spring.io/spring-framework/docs/current/javadoc-api/org/springframework/http/HttpHeaders.html + * Source: + * https://github.com/spring-projects/spring-framework/blob/816bbee8de584676250e2bc5dcff6da6cd81623f/spring-web/src/main/java/org/springframework/http/HttpHeaders.java + */ +namespace NHttpHeaders { + constexpr TStringBuf ACCEPT = "Accept"; + constexpr TStringBuf ACCEPT_CHARSET = "Accept-Charset"; + constexpr TStringBuf ACCEPT_ENCODING = "Accept-Encoding"; + constexpr TStringBuf ACCEPT_LANGUAGE = "Accept-Language"; + constexpr TStringBuf ACCEPT_RANGES = "Accept-Ranges"; + constexpr TStringBuf ACCESS_CONTROL_ALLOW_CREDENTIALS = "Access-Control-Allow-Credentials"; + constexpr TStringBuf ACCESS_CONTROL_ALLOW_HEADERS = "Access-Control-Allow-Headers"; + constexpr TStringBuf ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods"; + constexpr TStringBuf ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; + constexpr TStringBuf ACCESS_CONTROL_EXPOSE_HEADERS = "Access-Control-Expose-Headers"; + constexpr TStringBuf ACCESS_CONTROL_MAX_AGE = "Access-Control-Max-Age"; + constexpr TStringBuf ACCESS_CONTROL_REQUEST_HEADERS = "Access-Control-Request-Headers"; + constexpr TStringBuf ACCESS_CONTROL_REQUEST_METHOD = "Access-Control-Request-Method"; + constexpr TStringBuf AGE = "Age"; + constexpr TStringBuf ALLOW = "Allow"; + constexpr TStringBuf AUTHORIZATION = "Authorization"; + constexpr TStringBuf CACHE_CONTROL = "Cache-Control"; + constexpr TStringBuf CONNECTION = "Connection"; + constexpr TStringBuf CONTENT_ENCODING = "Content-Encoding"; + constexpr TStringBuf CONTENT_DISPOSITION = "Content-Disposition"; + constexpr TStringBuf CONTENT_LANGUAGE = "Content-Language"; + constexpr TStringBuf CONTENT_LENGTH = "Content-Length"; + constexpr TStringBuf CONTENT_LOCATION = "Content-Location"; + constexpr TStringBuf CONTENT_RANGE = "Content-Range"; + constexpr TStringBuf CONTENT_TYPE = "Content-Type"; + constexpr TStringBuf COOKIE = "Cookie"; + constexpr TStringBuf DATE = "Date"; + constexpr TStringBuf ETAG = "ETag"; + constexpr TStringBuf EXPECT = "Expect"; + constexpr TStringBuf EXPIRES = "Expires"; + constexpr TStringBuf FROM = "From"; + constexpr TStringBuf HOST = "Host"; + constexpr TStringBuf IF_MATCH = "If-Match"; + constexpr TStringBuf IF_MODIFIED_SINCE = "If-Modified-Since"; + constexpr TStringBuf IF_NONE_MATCH = "If-None-Match"; + constexpr TStringBuf IF_RANGE = "If-Range"; + constexpr TStringBuf IF_UNMODIFIED_SINCE = "If-Unmodified-Since"; + constexpr TStringBuf LAST_MODIFIED = "Last-Modified"; + constexpr TStringBuf LINK = "Link"; + constexpr TStringBuf LOCATION = "Location"; + constexpr TStringBuf MAX_FORWARDS = "Max-Forwards"; + constexpr TStringBuf ORIGIN = "Origin"; + constexpr TStringBuf PRAGMA = "Pragma"; + constexpr TStringBuf PROXY_AUTHENTICATE = "Proxy-Authenticate"; + constexpr TStringBuf PROXY_AUTHORIZATION = "Proxy-Authorization"; + constexpr TStringBuf RANGE = "Range"; + constexpr TStringBuf REFERER = "Referer"; + constexpr TStringBuf RETRY_AFTER = "Retry-After"; + constexpr TStringBuf SERVER = "Server"; + constexpr TStringBuf SET_COOKIE = "Set-Cookie"; + constexpr TStringBuf SET_COOKIE2 = "Set-Cookie2"; + constexpr TStringBuf TE = "TE"; + constexpr TStringBuf TRAILER = "Trailer"; + constexpr TStringBuf TRANSFER_ENCODING = "Transfer-Encoding"; + constexpr TStringBuf UPGRADE = "Upgrade"; + constexpr TStringBuf USER_AGENT = "User-Agent"; + constexpr TStringBuf VARY = "Vary"; + constexpr TStringBuf VIA = "Via"; + constexpr TStringBuf WARNING = "Warning"; + constexpr TStringBuf WWW_AUTHENTICATE = "WWW-Authenticate"; +} // namespace HttpHeaders diff --git a/library/cpp/http/misc/httpcodes.cpp b/library/cpp/http/misc/httpcodes.cpp new file mode 100644 index 0000000000..ad8c80ac1e --- /dev/null +++ b/library/cpp/http/misc/httpcodes.cpp @@ -0,0 +1,141 @@ +#include "httpcodes.h" + +TStringBuf HttpCodeStrEx(int code) noexcept { + switch (code) { + case HTTP_CONTINUE: + return TStringBuf("100 Continue"); + case HTTP_SWITCHING_PROTOCOLS: + return TStringBuf("101 Switching protocols"); + case HTTP_PROCESSING: + return TStringBuf("102 Processing"); + + case HTTP_OK: + return TStringBuf("200 Ok"); + case HTTP_CREATED: + return TStringBuf("201 Created"); + case HTTP_ACCEPTED: + return TStringBuf("202 Accepted"); + case HTTP_NON_AUTHORITATIVE_INFORMATION: + return TStringBuf("203 None authoritative information"); + case HTTP_NO_CONTENT: + return TStringBuf("204 No content"); + case HTTP_RESET_CONTENT: + return TStringBuf("205 Reset content"); + case HTTP_PARTIAL_CONTENT: + return TStringBuf("206 Partial content"); + case HTTP_MULTI_STATUS: + return TStringBuf("207 Multi status"); + case HTTP_ALREADY_REPORTED: + return TStringBuf("208 Already reported"); + case HTTP_IM_USED: + return TStringBuf("226 IM used"); + + case HTTP_MULTIPLE_CHOICES: + return TStringBuf("300 Multiple choices"); + case HTTP_MOVED_PERMANENTLY: + return TStringBuf("301 Moved permanently"); + case HTTP_FOUND: + return TStringBuf("302 Moved temporarily"); + case HTTP_SEE_OTHER: + return TStringBuf("303 See other"); + case HTTP_NOT_MODIFIED: + return TStringBuf("304 Not modified"); + case HTTP_USE_PROXY: + return TStringBuf("305 Use proxy"); + case HTTP_TEMPORARY_REDIRECT: + return TStringBuf("307 Temporarily redirect"); + case HTTP_PERMANENT_REDIRECT: + return TStringBuf("308 Permanent redirect"); + + case HTTP_BAD_REQUEST: + return TStringBuf("400 Bad request"); + case HTTP_UNAUTHORIZED: + return TStringBuf("401 Unauthorized"); + case HTTP_PAYMENT_REQUIRED: + return TStringBuf("402 Payment required"); + case HTTP_FORBIDDEN: + return TStringBuf("403 Forbidden"); + case HTTP_NOT_FOUND: + return TStringBuf("404 Not found"); + case HTTP_METHOD_NOT_ALLOWED: + return TStringBuf("405 Method not allowed"); + case HTTP_NOT_ACCEPTABLE: + return TStringBuf("406 Not acceptable"); + case HTTP_PROXY_AUTHENTICATION_REQUIRED: + return TStringBuf("407 Proxy Authentication required"); + case HTTP_REQUEST_TIME_OUT: + return TStringBuf("408 Request time out"); + case HTTP_CONFLICT: + return TStringBuf("409 Conflict"); + case HTTP_GONE: + return TStringBuf("410 Gone"); + case HTTP_LENGTH_REQUIRED: + return TStringBuf("411 Length required"); + case HTTP_PRECONDITION_FAILED: + return TStringBuf("412 Precondition failed"); + case HTTP_REQUEST_ENTITY_TOO_LARGE: + return TStringBuf("413 Request entity too large"); + case HTTP_REQUEST_URI_TOO_LARGE: + return TStringBuf("414 Request uri too large"); + case HTTP_UNSUPPORTED_MEDIA_TYPE: + return TStringBuf("415 Unsupported media type"); + case HTTP_REQUESTED_RANGE_NOT_SATISFIABLE: + return TStringBuf("416 Requested Range Not Satisfiable"); + case HTTP_EXPECTATION_FAILED: + return TStringBuf("417 Expectation Failed"); + case HTTP_I_AM_A_TEAPOT: + return TStringBuf("418 I Am A Teapot"); + case HTTP_AUTHENTICATION_TIMEOUT: + return TStringBuf("419 Authentication Timeout"); + case HTTP_MISDIRECTED_REQUEST: + return TStringBuf("421 Misdirected Request"); + case HTTP_UNPROCESSABLE_ENTITY: + return TStringBuf("422 Unprocessable Entity"); + case HTTP_LOCKED: + return TStringBuf("423 Locked"); + case HTTP_FAILED_DEPENDENCY: + return TStringBuf("424 Failed Dependency"); + case HTTP_UNORDERED_COLLECTION: + return TStringBuf("425 Unordered Collection"); + case HTTP_UPGRADE_REQUIRED: + return TStringBuf("426 Upgrade Required"); + case HTTP_PRECONDITION_REQUIRED: + return TStringBuf("428 Precondition Required"); + case HTTP_TOO_MANY_REQUESTS: + return TStringBuf("429 Too Many Requests"); + case HTTP_REQUEST_HEADER_FIELDS_TOO_LARGE: + return TStringBuf("431 Request Header Fields Too Large"); + case HTTP_UNAVAILABLE_FOR_LEGAL_REASONS: + return TStringBuf("451 Unavailable For Legal Reason"); + + case HTTP_INTERNAL_SERVER_ERROR: + return TStringBuf("500 Internal server error"); + case HTTP_NOT_IMPLEMENTED: + return TStringBuf("501 Not implemented"); + case HTTP_BAD_GATEWAY: + return TStringBuf("502 Bad gateway"); + case HTTP_SERVICE_UNAVAILABLE: + return TStringBuf("503 Service unavailable"); + case HTTP_GATEWAY_TIME_OUT: + return TStringBuf("504 Gateway time out"); + case HTTP_HTTP_VERSION_NOT_SUPPORTED: + return TStringBuf("505 HTTP version not supported"); + case HTTP_VARIANT_ALSO_NEGOTIATES: + return TStringBuf("506 Variant also negotiates"); + case HTTP_INSUFFICIENT_STORAGE: + return TStringBuf("507 Insufficient storage"); + case HTTP_LOOP_DETECTED: + return TStringBuf("508 Loop Detected"); + case HTTP_BANDWIDTH_LIMIT_EXCEEDED: + return TStringBuf("509 Bandwidth Limit Exceeded"); + case HTTP_NOT_EXTENDED: + return TStringBuf("510 Not Extended"); + case HTTP_NETWORK_AUTHENTICATION_REQUIRED: + return TStringBuf("511 Network Authentication Required"); + case HTTP_UNASSIGNED_512: + return TStringBuf("512 Unassigned"); + + default: + return TStringBuf("000 Unknown HTTP code"); + } +} diff --git a/library/cpp/http/misc/httpcodes.h b/library/cpp/http/misc/httpcodes.h new file mode 100644 index 0000000000..cbfbaa1188 --- /dev/null +++ b/library/cpp/http/misc/httpcodes.h @@ -0,0 +1,94 @@ +#pragma once + +#include <util/generic/strbuf.h> + +enum HttpCodes { + HTTP_CONTINUE = 100, + HTTP_SWITCHING_PROTOCOLS = 101, + HTTP_PROCESSING = 102, + + HTTP_OK = 200, + HTTP_CREATED = 201, + HTTP_ACCEPTED = 202, + HTTP_NON_AUTHORITATIVE_INFORMATION = 203, + HTTP_NO_CONTENT = 204, + HTTP_RESET_CONTENT = 205, + HTTP_PARTIAL_CONTENT = 206, + HTTP_MULTI_STATUS = 207, + HTTP_ALREADY_REPORTED = 208, + HTTP_IM_USED = 226, + + HTTP_MULTIPLE_CHOICES = 300, + HTTP_MOVED_PERMANENTLY = 301, + HTTP_FOUND = 302, + HTTP_SEE_OTHER = 303, + HTTP_NOT_MODIFIED = 304, + HTTP_USE_PROXY = 305, + HTTP_TEMPORARY_REDIRECT = 307, + HTTP_PERMANENT_REDIRECT = 308, + + HTTP_BAD_REQUEST = 400, + HTTP_UNAUTHORIZED = 401, + HTTP_PAYMENT_REQUIRED = 402, + HTTP_FORBIDDEN = 403, + HTTP_NOT_FOUND = 404, + HTTP_METHOD_NOT_ALLOWED = 405, + HTTP_NOT_ACCEPTABLE = 406, + HTTP_PROXY_AUTHENTICATION_REQUIRED = 407, + HTTP_REQUEST_TIME_OUT = 408, + HTTP_CONFLICT = 409, + HTTP_GONE = 410, + HTTP_LENGTH_REQUIRED = 411, + HTTP_PRECONDITION_FAILED = 412, + HTTP_REQUEST_ENTITY_TOO_LARGE = 413, + HTTP_REQUEST_URI_TOO_LARGE = 414, + HTTP_UNSUPPORTED_MEDIA_TYPE = 415, + HTTP_REQUESTED_RANGE_NOT_SATISFIABLE = 416, + HTTP_EXPECTATION_FAILED = 417, + HTTP_I_AM_A_TEAPOT = 418, + HTTP_AUTHENTICATION_TIMEOUT = 419, + HTTP_MISDIRECTED_REQUEST = 421, + HTTP_UNPROCESSABLE_ENTITY = 422, + HTTP_LOCKED = 423, + HTTP_FAILED_DEPENDENCY = 424, + HTTP_UNORDERED_COLLECTION = 425, + HTTP_UPGRADE_REQUIRED = 426, + HTTP_PRECONDITION_REQUIRED = 428, + HTTP_TOO_MANY_REQUESTS = 429, + HTTP_REQUEST_HEADER_FIELDS_TOO_LARGE = 431, + HTTP_UNAVAILABLE_FOR_LEGAL_REASONS = 451, + + HTTP_INTERNAL_SERVER_ERROR = 500, + HTTP_NOT_IMPLEMENTED = 501, + HTTP_BAD_GATEWAY = 502, + HTTP_SERVICE_UNAVAILABLE = 503, + HTTP_GATEWAY_TIME_OUT = 504, + HTTP_HTTP_VERSION_NOT_SUPPORTED = 505, + HTTP_VARIANT_ALSO_NEGOTIATES = 506, + HTTP_INSUFFICIENT_STORAGE = 507, + HTTP_LOOP_DETECTED = 508, + HTTP_BANDWIDTH_LIMIT_EXCEEDED = 509, + HTTP_NOT_EXTENDED = 510, + HTTP_NETWORK_AUTHENTICATION_REQUIRED = 511, + HTTP_UNASSIGNED_512 = 512, + + HTTP_CODE_MAX +}; + +TStringBuf HttpCodeStrEx(int code) noexcept; + +inline TStringBuf HttpCodeStr(int code) noexcept { + return HttpCodeStrEx(code).Skip(4); +} + +inline bool IsHttpCode(int code) noexcept { + return HttpCodeStrEx(code).data() != HttpCodeStrEx(0).data(); +} + +inline bool IsUserError(int code) noexcept { + return code >= 400 && code < 500; +} + +inline bool IsServerError(int code) noexcept { + return code >= 500; +} diff --git a/library/cpp/http/misc/httpdate.cpp b/library/cpp/http/misc/httpdate.cpp new file mode 100644 index 0000000000..4a3031bbf4 --- /dev/null +++ b/library/cpp/http/misc/httpdate.cpp @@ -0,0 +1,83 @@ +/*- +* Copyright 1997 Massachusetts Institute of Technology +* +* Permission to use, copy, modify, and distribute this software and +* its documentation for any purpose and without fee is hereby +* granted, provided that both the above copyright notice and this +* permission notice appear in all copies, that both the above +* copyright notice and this permission notice appear in all +* supporting documentation, and that the name of M.I.T. not be used +* in advertising or publicity pertaining to distribution of the +* software without specific, written prior permission. M.I.T. makes +* no representations about the suitability of this software for any +* purpose. It is provided "as is" without express or implied +* warranty. +* +* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS +* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, +* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT +* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. +*/ +#include <util/system/defaults.h> + +#include <sys/types.h> +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <ctime> + +#include <util/system/compat.h> /* stricmp */ +#include <util/system/yassert.h> +#include "httpdate.h" +#include <util/datetime/base.h> + +static const char *wkdays[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +static const char *months[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", + "Nov", "Dec" +}; + +int format_http_date(char buf[], size_t size, time_t when) { + struct tm tms; + GmTimeR(&when, &tms); + +#ifndef HTTP_DATE_ISO_8601 + return snprintf(buf, size, "%s, %02d %s %04d %02d:%02d:%02d GMT", + wkdays[tms.tm_wday], tms.tm_mday, months[tms.tm_mon], + tms.tm_year + 1900, tms.tm_hour, tms.tm_min, tms.tm_sec); +#else /* ISO 8601 */ + return snprintf(buf, size, "%04d%02d%02dT%02d%02d%02d+0000", + tms.tm_year + 1900, tms.tm_mon + 1, tms.tm_mday, + tms.tm_hour, tms.tm_min, tms.tm_sec); +#endif +} + +char* format_http_date(time_t when, char* buf, size_t buflen) { + const int len = format_http_date(buf, buflen, when); + + if (len == 0) { + return nullptr; + } + + Y_ASSERT(len > 0 && size_t(len) < buflen); + + return buf; +} + +TString FormatHttpDate(time_t when) { + char str[64] = {0}; + format_http_date(str, Y_ARRAY_SIZE(str), when); + return TString(str); +} diff --git a/library/cpp/http/misc/httpdate.h b/library/cpp/http/misc/httpdate.h new file mode 100644 index 0000000000..04876f38fe --- /dev/null +++ b/library/cpp/http/misc/httpdate.h @@ -0,0 +1,21 @@ +#pragma once + +#include <util/datetime/base.h> +#include <util/generic/string.h> + +#include <ctime> + +#define BAD_DATE ((time_t)-1) + +inline time_t parse_http_date(const TStringBuf& datestring) { + try { + return TInstant::ParseHttpDeprecated(datestring).TimeT(); + } catch (const TDateTimeParseException&) { + return BAD_DATE; + } +} + +int format_http_date(char buf[], size_t size, time_t when); +char* format_http_date(time_t when, char* buf, size_t len); + +TString FormatHttpDate(time_t when); diff --git a/library/cpp/http/misc/httpdate_ut.cpp b/library/cpp/http/misc/httpdate_ut.cpp new file mode 100644 index 0000000000..c1a0103501 --- /dev/null +++ b/library/cpp/http/misc/httpdate_ut.cpp @@ -0,0 +1,15 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "httpdate.h" + +Y_UNIT_TEST_SUITE(TestHttpDate) { + Y_UNIT_TEST(Test1) { + char buf1[100]; + char buf2[100]; + + UNIT_ASSERT((int)strlen(format_http_date(0, buf1, sizeof(buf1))) == format_http_date(buf2, sizeof(buf2), 0)); + } + Y_UNIT_TEST(Test2) { + UNIT_ASSERT_STRINGS_EQUAL(FormatHttpDate(1234567890), "Fri, 13 Feb 2009 23:31:30 GMT"); + } +} diff --git a/library/cpp/http/misc/httpreqdata.cpp b/library/cpp/http/misc/httpreqdata.cpp new file mode 100644 index 0000000000..f6951f68cd --- /dev/null +++ b/library/cpp/http/misc/httpreqdata.cpp @@ -0,0 +1,196 @@ +#include "httpreqdata.h" + +#include <util/stream/mem.h> + +TBaseServerRequestData::TBaseServerRequestData(SOCKET s) + : Addr(nullptr) + , Host() + , Port() + , Path(nullptr) + , Search(nullptr) + , SearchLength(0) + , Socket(s) + , BeginTime(MicroSeconds()) +{ +} + +TBaseServerRequestData::TBaseServerRequestData(const char* qs, SOCKET s) + : Addr(nullptr) + , Host() + , Port() + , Path(nullptr) + , Search((char*)qs) + , SearchLength(qs ? strlen(qs) : 0) + , OrigSearch(Search, SearchLength) + , Socket(s) + , BeginTime(MicroSeconds()) +{ +} + +void TBaseServerRequestData::AppendQueryString(const char* str, size_t length) { + if (Y_UNLIKELY(Search)) { + Y_ASSERT(strlen(Search) == SearchLength); + ModifiedQueryString.Reserve(SearchLength + length + 2); + ModifiedQueryString.Assign(Search, SearchLength); + if (SearchLength > 0 && Search[SearchLength - 1] != '&' && + length > 0 && str[0] != '&') { + ModifiedQueryString.Append('&'); + } + ModifiedQueryString.Append(str, length); + } else { + ModifiedQueryString.Reserve(length + 1); + ModifiedQueryString.Assign(str, length); + } + ModifiedQueryString.Append('\0'); + Search = ModifiedQueryString.data(); + SearchLength = ModifiedQueryString.size() - 1; // ignore terminator +} + +void TBaseServerRequestData::SetRemoteAddr(TStringBuf addr) { + TMemoryOutput out(AddrData, Y_ARRAY_SIZE(AddrData) - 1); + out.Write(addr.substr(0, Y_ARRAY_SIZE(AddrData) - 1)); + *out.Buf() = '\0'; + + Addr = AddrData; +} + +const char* TBaseServerRequestData::RemoteAddr() const { + if (!Addr) { + *AddrData = 0; + GetRemoteAddr(Socket, AddrData, sizeof(AddrData)); + Addr = AddrData; + } + + return Addr; +} + +const char* TBaseServerRequestData::HeaderIn(TStringBuf key) const { + auto it = HeadersIn_.find(key); + + if (it == HeadersIn_.end()) { + return nullptr; + } + + return it->second.data(); +} + +TString TBaseServerRequestData::HeaderByIndex(size_t n) const noexcept { + if (n >= HeadersCount()) { + return nullptr; + } + + THttpHeadersContainer::const_iterator i = HeadersIn_.begin(); + + while (n) { + ++i; + --n; + } + + return TString(i->first) + TStringBuf(": ") + i->second; +} + +const char* TBaseServerRequestData::Environment(const char* key) const { + if (stricmp(key, "REMOTE_ADDR") == 0) { + const char* ip = HeaderIn("X-Real-IP"); + if (ip) + return ip; + return RemoteAddr(); + } else if (stricmp(key, "QUERY_STRING") == 0) { + return QueryString(); + } else if (stricmp(key, "SERVER_NAME") == 0) { + return ServerName().data(); + } else if (stricmp(key, "SERVER_PORT") == 0) { + return ServerPort().data(); + } else if (stricmp(key, "SCRIPT_NAME") == 0) { + return ScriptName(); + } + return nullptr; +} + +void TBaseServerRequestData::Clear() { + HeadersIn_.clear(); + Addr = Path = Search = nullptr; + OrigSearch = {}; + SearchLength = 0; + Host.clear(); + Port.clear(); + CurPage.remove(); + ParseBuf.Clear(); + BeginTime = MicroSeconds(); +} + +const char* TBaseServerRequestData::GetCurPage() const { + if (!CurPage && Host) { + CurPage = "http://"; + CurPage += Host; + if (Port) { + CurPage += ':'; + CurPage += Port; + } + CurPage += Path; + if (Search) { + CurPage += '?'; + CurPage += Search; + } + } + return CurPage.data(); +} + +bool TBaseServerRequestData::Parse(const char* origReq) { + size_t origReqLength = strlen(origReq); + ParseBuf.Assign(origReq, origReqLength + 1); + char* req = ParseBuf.Data(); + + while (*req == ' ' || *req == '\t') + req++; + if (*req != '/') + return false; // we are not a proxy + while (req[1] == '/') // remove redundant slashes + req++; + + // detect url end (can contain some garbage after whitespace, e.g. 'HTTP 1.1') + char* urlEnd = req; + while (*urlEnd && *urlEnd != ' ' && *urlEnd != '\t') + urlEnd++; + if (*urlEnd) + *urlEnd = 0; + + // cut fragment if exists + char* fragment = strchr(req, '#'); + if (fragment) + *fragment = 0; // ignore fragment + else + fragment = urlEnd; + Path = req; + + // calculate Search length without additional strlen-ing + Search = strchr(Path, '?'); + if (Search) { + *Search++ = 0; + ptrdiff_t delta = fragment - Search; + // indeed, second case is a parse error + SearchLength = (delta >= 0) ? delta : (urlEnd - Search); + Y_ASSERT(strlen(Search) == SearchLength); + } else { + SearchLength = 0; + } + OrigSearch = {Search, SearchLength}; + + return true; +} + +void TBaseServerRequestData::AddHeader(const TString& name, const TString& value) { + HeadersIn_[name] = value; + + if (stricmp(name.data(), "Host") == 0) { + size_t hostLen = strcspn(value.data(), ":"); + if (value[hostLen] == ':') + Port = value.substr(hostLen + 1); + Host = value.substr(0, hostLen); + } +} + +void TBaseServerRequestData::SetPath(const TString& path) { + PathStorage = TBuffer(path.data(), path.size() + 1); + Path = PathStorage.Data(); +} diff --git a/library/cpp/http/misc/httpreqdata.h b/library/cpp/http/misc/httpreqdata.h new file mode 100644 index 0000000000..16e59c4d78 --- /dev/null +++ b/library/cpp/http/misc/httpreqdata.h @@ -0,0 +1,125 @@ +#pragma once + +#include <library/cpp/digest/lower_case/hash_ops.h> + +#include <util/str_stl.h> + +#include <util/system/defaults.h> +#include <util/string/cast.h> +#include <library/cpp/cgiparam/cgiparam.h> +#include <util/network/address.h> +#include <util/network/socket.h> +#include <util/generic/hash.h> +#include <util/system/yassert.h> +#include <util/generic/string.h> +#include <util/datetime/base.h> +#include <util/generic/buffer.h> + +using THttpHeadersContainer = THashMap<TString, TString, TCIOps, TCIOps>; + +class TBaseServerRequestData { +public: + TBaseServerRequestData(SOCKET s = INVALID_SOCKET); + TBaseServerRequestData(const char* qs, SOCKET s = INVALID_SOCKET); + + void SetHost(const TString& host, ui16 port) { + Host = host; + Port = ToString(port); + } + + const TString& ServerName() const { + return Host; + } + + NAddr::IRemoteAddrPtr ServerAddress() const { + return NAddr::GetSockAddr(Socket); + } + + const TString& ServerPort() const { + return Port; + } + + const char* ScriptName() const { + return Path; + } + + const char* QueryString() const { + return Search; + } + + TStringBuf QueryStringBuf() const { + return TStringBuf(Search, SearchLength); + } + + TStringBuf OrigQueryStringBuf() const { + return OrigSearch; + } + + void AppendQueryString(const char* str, size_t length); + const char* RemoteAddr() const; + void SetRemoteAddr(TStringBuf addr); + const char* HeaderIn(TStringBuf key) const; + + const THttpHeadersContainer& HeadersIn() const { + return HeadersIn_; + } + + inline size_t HeadersCount() const noexcept { + return HeadersIn_.size(); + } + + TString HeaderByIndex(size_t n) const noexcept; + const char* Environment(const char* key) const; + + void Clear(); + + void SetSocket(SOCKET s) noexcept { + Socket = s; + } + + ui64 RequestBeginTime() const noexcept { + return BeginTime; + } + + void SetPath(const TString& path); + const char* GetCurPage() const; + bool Parse(const char* req); + void AddHeader(const TString& name, const TString& value); + +private: + TBuffer PathStorage; + mutable char* Addr; + TString Host; + TString Port; + char* Path; + char* Search; + size_t SearchLength; // length of Search + TStringBuf OrigSearch; + THttpHeadersContainer HeadersIn_; + mutable char AddrData[INET6_ADDRSTRLEN]; + SOCKET Socket; + ui64 BeginTime; + mutable TString CurPage; + TBuffer ParseBuf; + TBuffer ModifiedQueryString; +}; + +class TServerRequestData: public TBaseServerRequestData { +public: + TServerRequestData(SOCKET s = INVALID_SOCKET) + : TBaseServerRequestData(s) + { + } + TServerRequestData(const char* qs, SOCKET s = INVALID_SOCKET) + : TBaseServerRequestData(qs, s) + { + Scan(); + } + + void Scan() { + CgiParam.Scan(QueryStringBuf()); + } + +public: + TCgiParameters CgiParam; +}; diff --git a/library/cpp/http/misc/httpreqdata_ut.cpp b/library/cpp/http/misc/httpreqdata_ut.cpp new file mode 100644 index 0000000000..e7f16ef27c --- /dev/null +++ b/library/cpp/http/misc/httpreqdata_ut.cpp @@ -0,0 +1,154 @@ +#include "httpreqdata.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TRequestServerDataTest) { + Y_UNIT_TEST(Headers) { + TServerRequestData sd; + + sd.AddHeader("x-xx", "y-yy"); + sd.AddHeader("x-Xx", "y-yy"); + + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 1); + + sd.AddHeader("x-XxX", "y-yyy"); + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("X-XX")), TStringBuf("y-yy")); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("X-XXX")), TStringBuf("y-yyy")); + } + + Y_UNIT_TEST(ComplexHeaders) { + TServerRequestData sd; + sd.SetHost("zzz", 1); + + sd.AddHeader("x-Xx", "y-yy"); + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 1); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("X-XX")), TStringBuf("y-yy")); + + sd.AddHeader("x-Xz", "y-yy"); + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 2); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("X-Xz")), TStringBuf("y-yy")); + + UNIT_ASSERT_VALUES_EQUAL(sd.ServerName(), "zzz"); + UNIT_ASSERT_VALUES_EQUAL(sd.ServerPort(), "1"); + sd.AddHeader("Host", "1234"); + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 3); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("Host")), TStringBuf("1234")); + UNIT_ASSERT_VALUES_EQUAL(sd.ServerName(), "1234"); + sd.AddHeader("Host", "12345:678"); + UNIT_ASSERT_VALUES_EQUAL(sd.HeadersCount(), 3); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(sd.HeaderIn("Host")), TStringBuf("12345:678")); + UNIT_ASSERT_VALUES_EQUAL(sd.ServerName(), "12345"); + UNIT_ASSERT_VALUES_EQUAL(sd.ServerPort(), "678"); + } + + Y_UNIT_TEST(ParseScan) { + TServerRequestData rd; + + // Parse parses url without host + UNIT_ASSERT(!rd.Parse(" http://yandex.ru/yandsearch?>a=fake&haha=da HTTP 1.1 OK")); + + // This should work + UNIT_ASSERT(rd.Parse(" /yandsearch?>a=fake&haha=da HTTP 1.1 OK")); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), ">a=fake&haha=da"); + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), rd.OrigQueryStringBuf()); + + rd.Scan(); + UNIT_ASSERT(rd.CgiParam.Has("gta", "fake")); + UNIT_ASSERT(rd.CgiParam.Has("haha", "da")); + UNIT_ASSERT(!rd.CgiParam.Has("no-param")); + + rd.Clear(); + } + + Y_UNIT_TEST(Ctor) { + const TString qs("gta=fake&haha=da"); + TServerRequestData rd(qs.c_str()); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), qs); + UNIT_ASSERT_STRINGS_EQUAL(rd.OrigQueryStringBuf(), qs); + + UNIT_ASSERT(rd.CgiParam.Has("gta")); + UNIT_ASSERT(rd.CgiParam.Has("haha")); + UNIT_ASSERT(!rd.CgiParam.Has("no-param")); + } + + Y_UNIT_TEST(HashCut) { + const TString qs(">a=fake&haha=da"); + const TString header = " /yandsearch?" + qs + "#&uberParam=yes&q=? HTTP 1.1 OK"; + + TServerRequestData rd; + rd.Parse(header.c_str()); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), qs); + UNIT_ASSERT_STRINGS_EQUAL(rd.OrigQueryStringBuf(), qs); + + rd.Scan(); + UNIT_ASSERT(rd.CgiParam.Has("gta")); + UNIT_ASSERT(rd.CgiParam.Has("haha")); + UNIT_ASSERT(!rd.CgiParam.Has("uberParam")); + } + + Y_UNIT_TEST(MisplacedHashCut) { + TServerRequestData rd; + rd.Parse(" /y#ndsearch?>a=fake&haha=da&uberParam=yes&q=? HTTP 1.1 OK"); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), ""); + UNIT_ASSERT_STRINGS_EQUAL(rd.OrigQueryStringBuf(), ""); + + rd.Scan(); + UNIT_ASSERT(rd.CgiParam.empty()); + } + + Y_UNIT_TEST(CornerCase) { + TServerRequestData rd; + rd.Parse(" /yandsearch?#"); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), ""); + UNIT_ASSERT_STRINGS_EQUAL(rd.OrigQueryStringBuf(), ""); + + rd.Scan(); + UNIT_ASSERT(rd.CgiParam.empty()); + } + + Y_UNIT_TEST(AppendQueryString) { + const TString qs("gta=fake&haha=da"); + TServerRequestData rd(qs.c_str()); + + UNIT_ASSERT(rd.CgiParam.Has("gta", "fake")); + UNIT_ASSERT(rd.CgiParam.Has("haha", "da")); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), qs); + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), rd.OrigQueryStringBuf()); + + constexpr TStringBuf appendix = "gta=true>a=new"; + rd.AppendQueryString(appendix.data(), appendix.size()); + + UNIT_ASSERT_STRINGS_EQUAL(rd.QueryStringBuf(), qs + '&' + appendix); + UNIT_ASSERT_STRINGS_EQUAL(rd.OrigQueryStringBuf(), qs); + + rd.Scan(); + + UNIT_ASSERT(rd.CgiParam.Has("gta", "true")); + UNIT_ASSERT(rd.CgiParam.Has("gta", "new")); + } + + Y_UNIT_TEST(SetRemoteAddrSimple) { + static const TString TEST = "abacaba.search.yandex.net"; + + TServerRequestData rd; + rd.SetRemoteAddr(TEST); + UNIT_ASSERT_STRINGS_EQUAL(TEST, rd.RemoteAddr()); + } + + Y_UNIT_TEST(SetRemoteAddrRandom) { + for (size_t size = 0; size < 2 * INET6_ADDRSTRLEN; ++size) { + const TString test = NUnitTest::RandomString(size, size); + TServerRequestData rd; + rd.SetRemoteAddr(test); + UNIT_ASSERT_STRINGS_EQUAL(test.substr(0, INET6_ADDRSTRLEN - 1), rd.RemoteAddr()); + } + } + +} // TRequestServerDataTest diff --git a/library/cpp/http/misc/parsed_request.cpp b/library/cpp/http/misc/parsed_request.cpp new file mode 100644 index 0000000000..e332a24e91 --- /dev/null +++ b/library/cpp/http/misc/parsed_request.cpp @@ -0,0 +1,32 @@ +#include "parsed_request.h" + +#include <util/string/strip.h> +#include <util/generic/yexception.h> +#include <util/string/cast.h> + +static inline TStringBuf StripLeft(const TStringBuf& s) noexcept { + const char* b = s.begin(); + const char* e = s.end(); + + StripRangeBegin(b, e); + + return TStringBuf(b, e); +} + +TParsedHttpRequest::TParsedHttpRequest(const TStringBuf& str) { + TStringBuf tmp; + + if (!StripLeft(str).TrySplit(' ', Method, tmp)) { + ythrow yexception() << "bad request(" << ToString(str).Quote() << ")"; + } + + if (!StripLeft(tmp).TrySplit(' ', Request, Proto)) { + ythrow yexception() << "bad request(" << ToString(str).Quote() << ")"; + } + + Proto = StripLeft(Proto); +} + +TParsedHttpLocation::TParsedHttpLocation(const TStringBuf& req) { + req.Split('?', Path, Cgi); +} diff --git a/library/cpp/http/misc/parsed_request.h b/library/cpp/http/misc/parsed_request.h new file mode 100644 index 0000000000..d4df705495 --- /dev/null +++ b/library/cpp/http/misc/parsed_request.h @@ -0,0 +1,26 @@ +#pragma once + +#include <util/generic/strbuf.h> + +struct TParsedHttpRequest { + TParsedHttpRequest(const TStringBuf& str); + + TStringBuf Method; + TStringBuf Request; + TStringBuf Proto; +}; + +struct TParsedHttpLocation { + TParsedHttpLocation(const TStringBuf& req); + + TStringBuf Path; + TStringBuf Cgi; +}; + +struct TParsedHttpFull: public TParsedHttpRequest, public TParsedHttpLocation { + inline TParsedHttpFull(const TStringBuf& line) + : TParsedHttpRequest(line) + , TParsedHttpLocation(Request) + { + } +}; diff --git a/library/cpp/http/misc/parsed_request_ut.cpp b/library/cpp/http/misc/parsed_request_ut.cpp new file mode 100644 index 0000000000..da6d95c6ab --- /dev/null +++ b/library/cpp/http/misc/parsed_request_ut.cpp @@ -0,0 +1,28 @@ +#include "parsed_request.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(THttpParse) { + Y_UNIT_TEST(TestParse) { + TParsedHttpFull h("GET /yandsearch?text=nokia HTTP/1.1"); + + UNIT_ASSERT_EQUAL(h.Method, "GET"); + UNIT_ASSERT_EQUAL(h.Request, "/yandsearch?text=nokia"); + UNIT_ASSERT_EQUAL(h.Proto, "HTTP/1.1"); + + UNIT_ASSERT_EQUAL(h.Path, "/yandsearch"); + UNIT_ASSERT_EQUAL(h.Cgi, "text=nokia"); + } + + Y_UNIT_TEST(TestError) { + bool wasError = false; + + try { + TParsedHttpFull("GET /yandsearch?text=nokiaHTTP/1.1"); + } catch (...) { + wasError = true; + } + + UNIT_ASSERT(wasError); + } +} diff --git a/library/cpp/http/misc/ut/ya.make b/library/cpp/http/misc/ut/ya.make new file mode 100644 index 0000000000..f4bdd35662 --- /dev/null +++ b/library/cpp/http/misc/ut/ya.make @@ -0,0 +1,11 @@ +UNITTEST_FOR(library/cpp/http/misc) + +OWNER(g:util) + +SRCS( + httpdate_ut.cpp + httpreqdata_ut.cpp + parsed_request_ut.cpp +) + +END() diff --git a/library/cpp/http/misc/ya.make b/library/cpp/http/misc/ya.make new file mode 100644 index 0000000000..fceb3cf79c --- /dev/null +++ b/library/cpp/http/misc/ya.make @@ -0,0 +1,24 @@ +LIBRARY() + +OWNER( + g:util + mvel +) + +GENERATE_ENUM_SERIALIZATION(httpcodes.h) + +SRCS( + httpcodes.cpp + httpdate.cpp + httpreqdata.cpp + parsed_request.cpp +) + +PEERDIR( + library/cpp/cgiparam + library/cpp/digest/lower_case +) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/library/cpp/http/server/conn.cpp b/library/cpp/http/server/conn.cpp new file mode 100644 index 0000000000..38a76c4c30 --- /dev/null +++ b/library/cpp/http/server/conn.cpp @@ -0,0 +1,69 @@ +#include "conn.h" + +#include <util/network/socket.h> +#include <util/stream/buffered.h> + +class THttpServerConn::TImpl { +public: + inline TImpl(const TSocket& s, size_t outputBufferSize) + : S_(s) + , SI_(S_) + , SO_(S_) + , BO_(&SO_, outputBufferSize) + , HI_(&SI_) + , HO_(&BO_, &HI_) + { + } + + inline ~TImpl() { + } + + inline THttpInput* Input() noexcept { + return &HI_; + } + + inline THttpOutput* Output() noexcept { + return &HO_; + } + + inline void Reset() { + if (S_ != INVALID_SOCKET) { + // send RST packet to client + S_.SetLinger(true, 0); + S_.Close(); + } + } + +private: + TSocket S_; + TSocketInput SI_; + TSocketOutput SO_; + TBufferedOutput BO_; + THttpInput HI_; + THttpOutput HO_; +}; + +THttpServerConn::THttpServerConn(const TSocket& s) + : THttpServerConn(s, s.MaximumTransferUnit()) +{ +} + +THttpServerConn::THttpServerConn(const TSocket& s, size_t outputBufferSize) + : Impl_(new TImpl(s, outputBufferSize)) +{ +} + +THttpServerConn::~THttpServerConn() { +} + +THttpInput* THttpServerConn::Input() noexcept { + return Impl_->Input(); +} + +THttpOutput* THttpServerConn::Output() noexcept { + return Impl_->Output(); +} + +void THttpServerConn::Reset() { + return Impl_->Reset(); +} diff --git a/library/cpp/http/server/conn.h b/library/cpp/http/server/conn.h new file mode 100644 index 0000000000..3aa5329af4 --- /dev/null +++ b/library/cpp/http/server/conn.h @@ -0,0 +1,37 @@ +#pragma once + +#include <library/cpp/http/io/stream.h> +#include <util/generic/ptr.h> + +class TSocket; + +/// Потоки ввода/вывода для получения запросов и отправки ответов HTTP-сервера. +class THttpServerConn { +public: + explicit THttpServerConn(const TSocket& s); + THttpServerConn(const TSocket& s, size_t outputBufferSize); + ~THttpServerConn(); + + THttpInput* Input() noexcept; + THttpOutput* Output() noexcept; + + inline const THttpInput* Input() const noexcept { + return const_cast<THttpServerConn*>(this)->Input(); + } + + inline const THttpOutput* Output() const noexcept { + return const_cast<THttpServerConn*>(this)->Output(); + } + + /// Проверяет, можно ли установить режим, при котором соединение с сервером + /// не завершается после окончания транзакции. + inline bool CanBeKeepAlive() const noexcept { + return Output()->CanBeKeepAlive(); + } + + void Reset(); + +private: + class TImpl; + THolder<TImpl> Impl_; +}; diff --git a/library/cpp/http/server/http.cpp b/library/cpp/http/server/http.cpp new file mode 100644 index 0000000000..128583bdd7 --- /dev/null +++ b/library/cpp/http/server/http.cpp @@ -0,0 +1,843 @@ +#include "http.h" +#include "http_ex.h" + +#include <library/cpp/threading/equeue/equeue.h> + +#include <util/generic/buffer.h> +#include <util/generic/cast.h> +#include <util/generic/intrlist.h> +#include <util/generic/yexception.h> +#include <util/network/address.h> +#include <util/network/socket.h> +#include <util/network/poller.h> +#include <util/system/atomic.h> +#include <util/system/compat.h> // stricmp, strnicmp, strlwr, strupr, stpcpy +#include <util/system/defaults.h> +#include <util/system/event.h> +#include <util/system/mutex.h> +#include <util/system/pipe.h> +#include <util/system/thread.h> +#include <util/thread/factory.h> + +#include <cerrno> +#include <cstring> +#include <ctime> + +#include <sys/stat.h> +#include <sys/types.h> + +using namespace NAddr; + +namespace { + class IPollAble { + public: + inline IPollAble() noexcept { + } + + virtual ~IPollAble() { + } + + virtual void OnPollEvent(TInstant now) = 0; + }; + + struct TShouldStop { + }; + + struct TWakeupPollAble: public IPollAble { + void OnPollEvent(TInstant) override { + throw TShouldStop(); + } + }; +} + +class TClientConnection: public IPollAble, public TIntrusiveListItem<TClientConnection> { +public: + TClientConnection(const TSocket& s, THttpServer::TImpl* serv, NAddr::IRemoteAddrRef listenerSockAddrRef); + ~TClientConnection() override; + + void OnPollEvent(TInstant now) override; + + inline void Activate(TInstant now) noexcept; + inline void DeActivate(); + inline void Reject(); + +public: + TSocket Socket_; + NAddr::IRemoteAddrRef ListenerSockAddrRef_; + THttpServer::TImpl* HttpServ_ = nullptr; + bool Reject_ = false; + TInstant LastUsed; + TInstant AcceptMoment; + size_t ReceivedRequests = 0; +}; + +class THttpServer::TImpl { +public: + class TConnections { + public: + inline TConnections(TSocketPoller* poller, const THttpServerOptions& options) + : Poller_(poller) + , Options(options) + { + } + + inline ~TConnections() { + } + + inline void Add(TClientConnection* c) noexcept { + TGuard<TMutex> g(Mutex_); + + Conns_.PushBack(c); + Poller_->WaitRead(c->Socket_, (void*)static_cast<const IPollAble*>(c)); + } + + inline void Erase(TClientConnection* c, TInstant now) noexcept { + TGuard<TMutex> g(Mutex_); + EraseUnsafe(c); + if (Options.ExpirationTimeout > TDuration::Zero()) { + TryRemovingUnsafe(now - Options.ExpirationTimeout); + } + } + + inline void Clear() noexcept { + TGuard<TMutex> g(Mutex_); + + Conns_.Clear(); + } + + inline bool RemoveOld(TInstant border) noexcept { + TGuard<TMutex> g(Mutex_); + return TryRemovingUnsafe(border); + } + + bool TryRemovingUnsafe(TInstant border) noexcept { + if (Conns_.Empty()) { + return false; + } + TClientConnection* c = &*(Conns_.Begin()); + if (c->LastUsed > border) { + return false; + } + EraseUnsafe(c); + delete c; + return true; + } + + void EraseUnsafe(TClientConnection* c) noexcept { + Poller_->Unwait(c->Socket_); + c->Unlink(); + } + + public: + TMutex Mutex_; + TIntrusiveListWithAutoDelete<TClientConnection, TDelete> Conns_; + TSocketPoller* Poller_ = nullptr; + const THttpServerOptions& Options; + }; + + static void* ListenSocketFunction(void* param) { + try { + ((TImpl*)param)->ListenSocket(); + } catch (...) { + + } + + return nullptr; + } + + TAutoPtr<TClientRequest> CreateRequest(TAutoPtr<TClientConnection> c) { + THolder<TClientRequest> obj(Cb_->CreateClient()); + + obj->Conn_.Reset(c.Release()); + + return obj; + } + + void AddRequestFromSocket(const TSocket& s, TInstant now, NAddr::IRemoteAddrRef listenerSockAddrRef) { + if (MaxRequestsReached()) { + Cb_->OnMaxConn(); + bool wasRemoved = Connections->RemoveOld(TInstant::Max()); + if (!wasRemoved && Options_.RejectExcessConnections) { + (new TClientConnection(s, this, listenerSockAddrRef))->Reject(); + return; + } + } + + auto connection = new TClientConnection(s, this, listenerSockAddrRef); + connection->LastUsed = now; + connection->DeActivate(); + } + + void SaveErrorCode() { + ErrorCode = WSAGetLastError(); + } + + int GetErrorCode() const { + return ErrorCode; + } + + const char* GetError() const { + return LastSystemErrorText(ErrorCode); + } + + bool Start() { + Poller.Reset(new TSocketPoller()); + Connections.Reset(new TConnections(Poller.Get(), Options_)); + + // Start the listener thread + ListenerRunningOK = false; + + // throws on error + TPipeHandle::Pipe(ListenWakeupReadFd, ListenWakeupWriteFd); + + SetNonBlock(ListenWakeupWriteFd, true); + SetNonBlock(ListenWakeupReadFd, true); + + Poller->WaitRead(ListenWakeupReadFd, &WakeupPollAble); + + ListenStartEvent.Reset(); + try { + ListenThread.Reset(new TThread(ListenSocketFunction, this)); + ListenThread->Start(); + } catch (const yexception&) { + SaveErrorCode(); + return false; + } + + // Wait until the thread has completely started and return the success indicator + ListenStartEvent.Wait(); + + return ListenerRunningOK; + } + + void Wait() { + Cb_->OnWait(); + TGuard<TMutex> g(StopMutex); + if (ListenThread) { + ListenThread->Join(); + ListenThread.Reset(nullptr); + } + } + + void Stop() { + Shutdown(); + + TGuard<TMutex> g(StopMutex); + if (ListenThread) { + ListenThread->Join(); + ListenThread.Reset(nullptr); + } + + while (ConnectionCount) { + usleep(10000); + Connections->Clear(); + } + + Connections.Destroy(); + Poller.Destroy(); + } + + void Shutdown() { + ListenWakeupWriteFd.Write("", 1); + // ignore result + } + + void AddRequest(TAutoPtr<TClientRequest> req, bool fail) { + struct TFailRequest: public THttpClientRequestEx { + inline TFailRequest(TAutoPtr<TClientRequest> parent) { + Conn_.Reset(parent->Conn_.Release()); + HttpConn_.Reset(parent->HttpConn_.Release()); + } + + bool Reply(void*) override { + if (!ProcessHeaders()) { + return true; + } + + ProcessFailRequest(0); + return true; + } + }; + + if (!fail && Requests->Add(req.Get())) { + Y_UNUSED(req.Release()); + } else { + req = new TFailRequest(req); + + if (FailRequests->Add(req.Get())) { + Y_UNUSED(req.Release()); + } else { + Cb_->OnFailRequest(-1); + } + } + } + + size_t GetRequestQueueSize() const { + return Requests->Size(); + } + + size_t GetFailQueueSize() const { + return FailRequests->Size(); + } + + const IThreadPool& GetRequestQueue() const { + return *Requests; + } + + const IThreadPool& GetFailQueue() const { + return *FailRequests; + } + + class TListenSocket: public IPollAble, public TIntrusiveListItem<TListenSocket> { + public: + inline TListenSocket(const TSocket& s, TImpl* parent) + : S_(s) + , Server_(parent) + , SockAddrRef_(GetSockAddr(S_)) + { + } + + ~TListenSocket() override { + } + + void OnPollEvent(TInstant) override { + SOCKET s = ::accept(S_, nullptr, nullptr); + + if (s == INVALID_SOCKET) { + ythrow yexception() << "accept: " << LastSystemErrorText(); + } + + Server_->AddRequestFromSocket(s, TInstant::Now(), SockAddrRef_); + } + + SOCKET GetSocket() const noexcept { + return S_; + } + + private: + TSocket S_; + TImpl* Server_ = nullptr; + NAddr::IRemoteAddrRef SockAddrRef_; + }; + + void ListenSocket() { + TThread::SetCurrentThreadName(Options_.ListenThreadName.c_str()); + + ErrorCode = 0; + TIntrusiveListWithAutoDelete<TListenSocket, TDelete> Reqs; + + std::function<void(TSocket)> callback = [&](TSocket socket) { + THolder<TListenSocket> ls(new TListenSocket(socket, this)); + Poller->WaitRead(socket, static_cast<IPollAble*>(ls.Get())); + Reqs.PushBack(ls.Release()); + }; + bool addressesBound = TryToBindAddresses(Options_, &callback); + if (!addressesBound) { + SaveErrorCode(); + ListenStartEvent.Signal(); + + return; + } + + Requests->Start(Options_.nThreads, Options_.MaxQueueSize); + FailRequests->Start(Options_.nFThreads, Options_.MaxFQueueSize); + Cb_->OnListenStart(); + ListenerRunningOK = true; + ListenStartEvent.Signal(); + + TVector<void*> events; + events.resize(1); + + TInstant now = TInstant::Now(); + for (;;) { + try { + const TInstant deadline = Options_.PollTimeout == TDuration::Zero() ? TInstant::Max() : now + Options_.PollTimeout; + const size_t ret = Poller->WaitD(events.data(), events.size(), deadline); + + now = TInstant::Now(); + for (size_t i = 0; i < ret; ++i) { + ((IPollAble*)events[i])->OnPollEvent(now); + } + + if (ret == 0 && Options_.ExpirationTimeout > TDuration::Zero()) { + Connections->RemoveOld(now - Options_.ExpirationTimeout); + } + + // When MaxConnections is limited or ExpirationTimeout is set, OnPollEvent can call + // RemoveOld and destroy other IPollAble* objects in the + // poller. Thus in this case we can safely process only one + // event from the poller at a time. + if (!Options_.MaxConnections && Options_.ExpirationTimeout == TDuration::Zero()) { + if (ret >= events.size()) { + events.resize(ret * 2); + } + } + } catch (const TShouldStop&) { + break; + } catch (...) { + Cb_->OnException(); + } + } + + while (!Reqs.Empty()) { + THolder<TListenSocket> ls(Reqs.PopFront()); + + Poller->Unwait(ls->GetSocket()); + } + + Requests->Stop(); + FailRequests->Stop(); + Cb_->OnListenStop(); + } + + void RestartRequestThreads(ui32 nTh, ui32 maxQS) { + Requests->Stop(); + Options_.nThreads = nTh; + Options_.MaxQueueSize = maxQS; + Requests->Start(Options_.nThreads, Options_.MaxQueueSize); + } + + TImpl(THttpServer* parent, ICallBack* cb, TMtpQueueRef mainWorkers, TMtpQueueRef failWorkers, const TOptions& options_) + : Requests(mainWorkers) + , FailRequests(failWorkers) + , Options_(options_) + , Cb_(cb) + , Parent_(parent) + { + } + + TImpl(THttpServer* parent, ICallBack* cb, const TOptions& options, IThreadFactory* factory) + : TImpl( + parent, + cb, + MakeThreadPool<TSimpleThreadPool>(factory, options.UseElasticQueues, cb, options.RequestsThreadName), + MakeThreadPool<TThreadPool>(factory, options.UseElasticQueues, nullptr, options.FailRequestsThreadName), + options) { + } + + ~TImpl() { + try { + Stop(); + } catch (...) { + } + } + + inline const TOptions& Options() const noexcept { + return Options_; + } + + inline void DecreaseConnections() noexcept { + AtomicDecrement(ConnectionCount); + } + + inline void IncreaseConnections() noexcept { + AtomicIncrement(ConnectionCount); + } + + inline i64 GetClientCount() const { + return AtomicGet(ConnectionCount); + } + + inline bool MaxRequestsReached() const { + return Options_.MaxConnections && ((size_t)GetClientCount() >= Options_.MaxConnections); + } + + THolder<TThread> ListenThread; + TPipeHandle ListenWakeupReadFd; + TPipeHandle ListenWakeupWriteFd; + TSystemEvent ListenStartEvent; + TMtpQueueRef Requests; + TMtpQueueRef FailRequests; + TAtomic ConnectionCount = 0; + THolder<TSocketPoller> Poller; + THolder<TConnections> Connections; + bool ListenerRunningOK = false; + int ErrorCode = 0; + TOptions Options_; + ICallBack* Cb_ = nullptr; + THttpServer* Parent_ = nullptr; + TWakeupPollAble WakeupPollAble; + TMutex StopMutex; + +private: + template <class TThreadPool_> + static THolder<IThreadPool> MakeThreadPool(IThreadFactory* factory, bool elastic, ICallBack* callback = nullptr, const TString& threadName = {}) { + if (!factory) { + factory = SystemThreadFactory(); + } + + THolder<IThreadPool> pool; + const auto params = IThreadPool::TParams().SetFactory(factory).SetThreadName(threadName); + if (callback) { + pool = MakeHolder<TThreadPoolBinder<TThreadPool_, THttpServer::ICallBack>>(callback, params); + } else { + pool = MakeHolder<TThreadPool_>(params); + } + + if (elastic) { + pool = MakeHolder<TElasticQueue>(std::move(pool)); + } + + return pool; + } +}; + +THttpServer::THttpServer(ICallBack* cb, const TOptions& options, IThreadFactory* pool) + : Impl_(new TImpl(this, cb, options, pool)) +{ +} + +THttpServer::THttpServer(ICallBack* cb, TMtpQueueRef mainWorkers, TMtpQueueRef failWorkers, const TOptions& options) + : Impl_(new TImpl(this, cb, mainWorkers, failWorkers, options)) +{ +} + +THttpServer::~THttpServer() { +} + +i64 THttpServer::GetClientCount() const { + return Impl_->GetClientCount(); +} + +bool THttpServer::Start() { + return Impl_->Start(); +} + +void THttpServer::Stop() { + Impl_->Stop(); +} + +void THttpServer::Shutdown() { + Impl_->Shutdown(); +} + +void THttpServer::Wait() { + Impl_->Wait(); +} + +int THttpServer::GetErrorCode() { + return Impl_->GetErrorCode(); +} + +const char* THttpServer::GetError() { + return Impl_->GetError(); +} + +void THttpServer::RestartRequestThreads(ui32 n, ui32 queue) { + Impl_->RestartRequestThreads(n, queue); +} + +const THttpServer::TOptions& THttpServer::Options() const noexcept { + return Impl_->Options(); +} + +size_t THttpServer::GetRequestQueueSize() const { + return Impl_->GetRequestQueueSize(); +} + +size_t THttpServer::GetFailQueueSize() const { + return Impl_->GetFailQueueSize(); +} + +const IThreadPool& THttpServer::GetRequestQueue() const { + return Impl_->GetRequestQueue(); +} + +const IThreadPool& THttpServer::GetFailQueue() const { + return Impl_->GetFailQueue(); +} + +bool THttpServer::MaxRequestsReached() const { + return Impl_->MaxRequestsReached(); +} + +TClientConnection::TClientConnection(const TSocket& s, THttpServer::TImpl* serv, NAddr::IRemoteAddrRef listenerSockAddrRef) + : Socket_(s) + , ListenerSockAddrRef_(listenerSockAddrRef) + , HttpServ_(serv) +{ + SetNoDelay(Socket_, true); + + const TDuration& clientTimeout = HttpServ_->Options().ClientTimeout; + if (clientTimeout != TDuration::Zero()) { + SetSocketTimeout(Socket_, (long)clientTimeout.Seconds(), clientTimeout.MilliSecondsOfSecond()); + } + + HttpServ_->IncreaseConnections(); +} + +TClientConnection::~TClientConnection() { + HttpServ_->DecreaseConnections(); +} + +void TClientConnection::OnPollEvent(TInstant now) { + THolder<TClientConnection> this_(this); + Activate(now); + + { + char tmp[1]; + + if (::recv(Socket_, tmp, 1, MSG_PEEK) < 1) { + /* + * We can received a FIN so our socket was moved to + * TCP_CLOSE_WAIT state. Check it before adding work + * for this socket. + */ + + return; + } + } + + THolder<TClientRequest> obj(HttpServ_->CreateRequest(this_)); + AcceptMoment = now; + + HttpServ_->AddRequest(obj, Reject_); +} + +void TClientConnection::Activate(TInstant now) noexcept { + HttpServ_->Connections->Erase(this, now); + LastUsed = now; + ++ReceivedRequests; +} + +void TClientConnection::DeActivate() { + HttpServ_->Connections->Add(this); +} + +void TClientConnection::Reject() { + Reject_ = true; + + HttpServ_->Connections->Add(this); +} + +TClientRequest::TClientRequest() { +} + +TClientRequest::~TClientRequest() { +} + +bool TClientRequest::Reply(void* /*ThreadSpecificResource*/) { + if (strnicmp(RequestString.data(), "GET ", 4)) { + Output() << "HTTP/1.0 501 Not Implemented\r\n\r\n"; + } else { + Output() << "HTTP/1.0 200 OK\r\n" + "Content-Type: text/html\r\n" + "\r\n" + "Hello World!\r\n"; + } + + return true; +} + +bool TClientRequest::IsLocal() const { + return HasLocalAddress(Socket()); +} + +bool TClientRequest::CheckLoopback() { + bool isLocal = false; + + try { + isLocal = IsLocal(); + } catch (const yexception& e) { + Output() << "HTTP/1.0 500 Oops\r\n\r\n" + << e.what() << "\r\n"; + return false; + } + + if (!isLocal) { + Output() << "HTTP/1.0 403 Permission denied\r\n" + "Content-Type: text/html; charset=windows-1251\r\n" + "Connection: close\r\n" + "\r\n" + "<html><head><title>Permission denied</title></head>" + "<body><h1>Permission denied</h1>" + "<p>This request must be sent from the localhost.</p>" + "</body></html>\r\n"; + + return false; + } + + return true; +} + +void TClientRequest::ReleaseConnection() { + if (Conn_ && HttpConn_ && HttpServ()->Options().KeepAliveEnabled && HttpConn_->CanBeKeepAlive() && (!HttpServ()->Options().RejectExcessConnections || !HttpServ()->MaxRequestsReached())) { + Output().Finish(); + Conn_->DeActivate(); + Y_UNUSED(Conn_.Release()); + } +} + +void TClientRequest::ResetConnection() { + if (HttpConn_) { + // send RST packet to client + HttpConn_->Reset(); + HttpConn_.Destroy(); + } +} + +void TClientRequest::Process(void* ThreadSpecificResource) { + THolder<TClientRequest> this_(this); + + auto* serverImpl = Conn_->HttpServ_; + + try { + if (!HttpConn_) { + const size_t outputBufferSize = HttpServ()->Options().OutputBufferSize; + if (outputBufferSize) { + HttpConn_.Reset(new THttpServerConn(Socket(), outputBufferSize)); + } else { + HttpConn_.Reset(new THttpServerConn(Socket())); + } + + auto maxRequestsPerConnection = HttpServ()->Options().MaxRequestsPerConnection; + HttpConn_->Output()->EnableKeepAlive(HttpServ()->Options().KeepAliveEnabled && (!maxRequestsPerConnection || Conn_->ReceivedRequests < maxRequestsPerConnection)); + HttpConn_->Output()->EnableCompression(HttpServ()->Options().CompressionEnabled); + } + + if (ParsedHeaders.empty()) { + RequestString = Input().FirstLine(); + + const THttpHeaders& h = Input().Headers(); + ParsedHeaders.reserve(h.Count()); + for (THttpHeaders::TConstIterator it = h.Begin(); it != h.End(); ++it) { + ParsedHeaders.emplace_back(it->Name(), it->Value()); + } + } + + if (Reply(ThreadSpecificResource)) { + ReleaseConnection(); + + /* + * *this will be destroyed... + */ + + return; + } + } catch (...) { + serverImpl->Cb_->OnException(); + + throw; + } + + Y_UNUSED(this_.Release()); +} + +void TClientRequest::ProcessFailRequest(int failstate) { + Output() << "HTTP/1.1 503 Service Unavailable\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 21\r\n" + "\r\n" + "Service Unavailable\r\n"; + + TString url; + + if (!strnicmp(RequestString.data(), "GET ", 4)) { + // Trying to extract url... + const char* str = RequestString.data(); + + // Skipping spaces before url... + size_t start = 3; + while (str[start] == ' ') { + ++start; + } + + if (str[start]) { + // Traversing url... + size_t idx = start; + + while (str[idx] != ' ' && str[idx]) { + ++idx; + } + + url = RequestString.substr(start, idx - start); + } + } + + const THttpServer::ICallBack::TFailLogData d = { + failstate, + url, + }; + + // Handling failure... + Conn_->HttpServ_->Cb_->OnFailRequestEx(d); + Output().Flush(); +} + +THttpServer* TClientRequest::HttpServ() const noexcept { + return Conn_->HttpServ_->Parent_; +} + +const TSocket& TClientRequest::Socket() const noexcept { + return Conn_->Socket_; +} + +NAddr::IRemoteAddrRef TClientRequest::GetListenerSockAddrRef() const noexcept { + return Conn_->ListenerSockAddrRef_; +} + +TInstant TClientRequest::AcceptMoment() const noexcept { + return Conn_->AcceptMoment; +} + +/* + * TRequestReplier + */ +TRequestReplier::TRequestReplier() { +} + +TRequestReplier::~TRequestReplier() { +} + +bool TRequestReplier::Reply(void* threadSpecificResource) { + const TReplyParams params = { + threadSpecificResource, Input(), Output()}; + + return DoReply(params); +} + +bool TryToBindAddresses(const THttpServerOptions& options, const std::function<void(TSocket)>* callbackOnBoundAddress) { + THttpServerOptions::TBindAddresses addrs; + try { + options.BindAddresses(addrs); + } catch (const std::exception&) { + return false; + } + + for (const auto& na : addrs) { + for (TNetworkAddress::TIterator ai = na.Begin(); ai != na.End(); ++ai) { + NAddr::TAddrInfo addr(&*ai); + + TSocket socket(::socket(addr.Addr()->sa_family, SOCK_STREAM, 0)); + + if (socket == INVALID_SOCKET) { + return false; + } + + FixIPv6ListenSocket(socket); + + if (options.ReuseAddress) { + int yes = 1; + ::setsockopt(socket, SOL_SOCKET, SO_REUSEADDR, (const char*)&yes, sizeof(yes)); + } + + if (options.ReusePort) { + SetReusePort(socket, true); + } + + if (::bind(socket, addr.Addr(), addr.Len()) == SOCKET_ERROR) { + return false; + } + + if (::listen(socket, options.ListenBacklog) == SOCKET_ERROR) { + return false; + } + + if (callbackOnBoundAddress != nullptr) { + (*callbackOnBoundAddress)(socket); + } + } + } + + return true; +} diff --git a/library/cpp/http/server/http.h b/library/cpp/http/server/http.h new file mode 100644 index 0000000000..b292d38f27 --- /dev/null +++ b/library/cpp/http/server/http.h @@ -0,0 +1,176 @@ +#pragma once + +#include "conn.h" +#include "options.h" + +#include <util/thread/pool.h> +#include <library/cpp/http/io/stream.h> +#include <util/memory/blob.h> +#include <util/generic/ptr.h> +#include <util/generic/vector.h> +#include <util/system/atomic.h> + +class IThreadFactory; +class TClientRequest; +class TClientConnection; + +class THttpServer { + friend class TClientRequest; + friend class TClientConnection; + +public: + class ICallBack { + public: + struct TFailLogData { + int failstate; + TString url; + }; + + virtual ~ICallBack() { + } + + virtual void OnFailRequest(int /*failstate*/) { + } + + virtual void OnFailRequestEx(const TFailLogData& d) { + OnFailRequest(d.failstate); + } + + virtual void OnException() { + } + + virtual void OnMaxConn() { + } + + virtual TClientRequest* CreateClient() = 0; + + virtual void OnListenStart() { + } + + virtual void OnListenStop() { + } + + virtual void OnWait() { + } + + virtual void* CreateThreadSpecificResource() { + return nullptr; + } + + virtual void DestroyThreadSpecificResource(void*) { + } + }; + + typedef THttpServerOptions TOptions; + typedef TSimpleSharedPtr<IThreadPool> TMtpQueueRef; + + THttpServer(ICallBack* cb, const TOptions& options = TOptions(), IThreadFactory* pool = nullptr); + THttpServer(ICallBack* cb, TMtpQueueRef mainWorkers, TMtpQueueRef failWorkers, const TOptions& options = TOptions()); + virtual ~THttpServer(); + + bool Start(); + + // shutdown a.s.a.p. + void Stop(); + + // graceful shutdown with serving all already open connections + void Shutdown(); + + void Wait(); + int GetErrorCode(); + const char* GetError(); + void RestartRequestThreads(ui32 nTh, ui32 maxQS); + const TOptions& Options() const noexcept; + i64 GetClientCount() const; + + class TImpl; + size_t GetRequestQueueSize() const; + size_t GetFailQueueSize() const; + + const IThreadPool& GetRequestQueue() const; + const IThreadPool& GetFailQueue() const; + + static TAtomicBase AcceptReturnsInvalidSocketCounter(); + +private: + bool MaxRequestsReached() const; + +private: + THolder<TImpl> Impl_; +}; + +/** + * @deprecated Use TRequestReplier instead + */ +class TClientRequest: public IObjectInQueue { + friend class THttpServer::TImpl; + +public: + TClientRequest(); + ~TClientRequest() override; + + inline THttpInput& Input() noexcept { + return *HttpConn_->Input(); + } + + inline THttpOutput& Output() noexcept { + return *HttpConn_->Output(); + } + + THttpServer* HttpServ() const noexcept; + const TSocket& Socket() const noexcept; + NAddr::IRemoteAddrRef GetListenerSockAddrRef() const noexcept; + TInstant AcceptMoment() const noexcept; + + bool IsLocal() const; + bool CheckLoopback(); + void ProcessFailRequest(int failstate); + + void ReleaseConnection(); + + void ResetConnection(); + +private: + /* + * Processes the request after 'connection' been created and 'Headers' been read + * Returns 'false' if the processing must be continued by the next handler, + * 'true' otherwise ('this' will be deleted) + */ + virtual bool Reply(void* ThreadSpecificResource); + void Process(void* ThreadSpecificResource) override; + +public: + TVector<std::pair<TString, TString>> ParsedHeaders; + TString RequestString; + +private: + THolder<TClientConnection> Conn_; + THolder<THttpServerConn> HttpConn_; +}; + +class TRequestReplier: public TClientRequest { +public: + TRequestReplier(); + ~TRequestReplier() override; + + struct TReplyParams { + void* ThreadSpecificResource; + THttpInput& Input; + THttpOutput& Output; + }; + + /* + * Processes the request after 'connection' been created and 'Headers' been read + * Returns 'false' if the processing must be continued by the next handler, + * 'true' otherwise ('this' will be deleted) + */ + virtual bool DoReply(const TReplyParams& params) = 0; + +private: + bool Reply(void* threadSpecificResource) final; + + using TClientRequest::Input; + using TClientRequest::Output; +}; + +bool TryToBindAddresses(const THttpServerOptions& options, const std::function<void(TSocket)>* callbackOnBoundAddress = nullptr); diff --git a/library/cpp/http/server/http_ex.cpp b/library/cpp/http/server/http_ex.cpp new file mode 100644 index 0000000000..e07db22bfc --- /dev/null +++ b/library/cpp/http/server/http_ex.cpp @@ -0,0 +1,107 @@ +#include "http_ex.h" + +#include <util/generic/buffer.h> +#include <util/generic/cast.h> +#include <util/stream/null.h> + +bool THttpClientRequestExtension::Parse(char* req, TBaseServerRequestData& rd) { + rd.SetSocket(Socket()); + + if (!rd.Parse(req)) { + Output() << "HTTP/1.1 403 Forbidden\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 39\r\n" + "\r\n" + "The server cannot be used as a proxy.\r\n"; + + return false; + } + + return true; +} + +bool THttpClientRequestExtension::ProcessHeaders(TBaseServerRequestData& rd, TBlob& postData) { + for (const auto& header : ParsedHeaders) { + rd.AddHeader(header.first, header.second); + } + + char* s = RequestString.begin(); + + enum EMethod { + NotImplemented, + Get, + Post, + Put, + Patch, + Delete, + }; + + enum EMethod foundMethod; + char* urlStart; + + if (strnicmp(s, "GET ", 4) == 0) { + foundMethod = Get; + urlStart = s + 4; + } else if (strnicmp(s, "POST ", 5) == 0) { + foundMethod = Post; + urlStart = s + 5; + } else if (strnicmp(s, "PUT ", 4) == 0) { + foundMethod = Put; + urlStart = s + 4; + } else if (strnicmp(s, "PATCH ", 6) == 0) { + foundMethod = Patch; + urlStart = s + 6; + } else if (strnicmp(s, "DELETE ", 7) == 0) { + foundMethod = Delete; + urlStart = s + 7; + } else { + foundMethod = NotImplemented; + } + + switch (foundMethod) { + case Get: + case Delete: + if (!Parse(urlStart, rd)) { + return false; + } + break; + + case Post: + case Put: + case Patch: + try { + ui64 contentLength = 0; + if (Input().HasExpect100Continue()) { + Output().SendContinue(); + } + + if (!Input().ContentEncoded() && Input().GetContentLength(contentLength)) { + if (contentLength > HttpServ()->Options().MaxInputContentLength) { + Output() << "HTTP/1.1 413 Payload Too Large\r\nContent-Length:0\r\n\r\n"; + Output().Finish(); + return false; + } + + TBuffer buf(SafeIntegerCast<size_t>(contentLength)); + buf.Resize(Input().Load(buf.Data(), (size_t)contentLength)); + postData = TBlob::FromBuffer(buf); + } else { + postData = TBlob::FromStream(Input()); + } + } catch (...) { + Output() << "HTTP/1.1 400 Bad request\r\n\r\n"; + return false; + } + + if (!Parse(urlStart, rd)) { + return false; + } + break; + + case NotImplemented: + Output() << "HTTP/1.1 501 Not Implemented\r\n\r\n"; + return false; + } + + return true; +} diff --git a/library/cpp/http/server/http_ex.h b/library/cpp/http/server/http_ex.h new file mode 100644 index 0000000000..1ef43ea4fd --- /dev/null +++ b/library/cpp/http/server/http_ex.h @@ -0,0 +1,28 @@ +#pragma once + +#include "http.h" + +#include <library/cpp/http/misc/httpreqdata.h> + +class THttpClientRequestExtension: public TClientRequest { +public: + bool Parse(char* req, TBaseServerRequestData& rd); + bool ProcessHeaders(TBaseServerRequestData& rd, TBlob& postData); +}; + +template <class TRequestData> +class THttpClientRequestExtImpl: public THttpClientRequestExtension { +protected: + bool Parse(char* req) { + return THttpClientRequestExtension::Parse(req, RD); + } + bool ProcessHeaders() { + return THttpClientRequestExtension::ProcessHeaders(RD, Buf); + } + +protected: + TRequestData RD; + TBlob Buf; +}; + +using THttpClientRequestEx = THttpClientRequestExtImpl<TServerRequestData>; diff --git a/library/cpp/http/server/http_ut.cpp b/library/cpp/http/server/http_ut.cpp new file mode 100644 index 0000000000..cc62bb988e --- /dev/null +++ b/library/cpp/http/server/http_ut.cpp @@ -0,0 +1,739 @@ +#include "http.h" +#include "http_ex.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/generic/cast.h> +#include <util/stream/output.h> +#include <util/stream/zlib.h> +#include <util/system/datetime.h> +#include <util/system/sem.h> + +Y_UNIT_TEST_SUITE(THttpServerTest) { + class TEchoServer: public THttpServer::ICallBack { + class TRequest: public THttpClientRequestEx { + public: + inline TRequest(TEchoServer* parent) + : Parent_(parent) + { + } + + bool Reply(void* /*tsr*/) override { + if (!ProcessHeaders()) { + return true; + } + + Output() << "HTTP/1.1 200 Ok\r\n\r\n"; + if (Buf.Size()) { + Output().Write(Buf.AsCharPtr(), Buf.Size()); + } else { + Output() << Parent_->Res_; + } + Output().Finish(); + + return true; + } + + private: + TEchoServer* Parent_ = nullptr; + }; + + public: + inline TEchoServer(const TString& res) + : Res_(res) + { + } + + TClientRequest* CreateClient() override { + return new TRequest(this); + } + + private: + TString Res_; + }; + + class TSleepingServer: public THttpServer::ICallBack { + class TReplier: public TRequestReplier { + public: + inline TReplier(TSleepingServer* server) + : Server(server) + { + } + + bool DoReply(const TReplyParams& params) override { + Server->FreeThread(); + Server->Busy(1); + params.Output.Write("HTTP/1.0 201 Created\nX-Server: sleeping server\n\nZoooo"); + params.Output.Finish(); + Server->Replies->Inc(); + return true; + } + + private: + TSleepingServer* Server = nullptr; + }; + + public: + inline TSleepingServer(unsigned int size) + : Semaphore("conns", size) + , Semaphore2("threads", 1) + , Replies(new TAtomicCounter()) + , MaxConns(new TAtomicCounter()) + { + } + + void ResetCounters() { + Replies.Reset(new TAtomicCounter()); + MaxConns.Reset(new TAtomicCounter()); + } + + long RepliesCount() const { + return Replies->Val(); + } + + long MaxConnsCount() const { + return MaxConns->Val(); + } + + TClientRequest* CreateClient() override { + return new TReplier(this); + } + + void OnMaxConn() override { + MaxConns->Inc(); + } + + void OnFailRequest(int) override { + FreeThread(); + Busy(1); + } + + void Busy(int count) { + while (count-- > 0) { + Semaphore.Acquire(); + } + } + + void BusyThread() { + Semaphore2.Acquire(); + } + + void Free(int count) { + while (count-- > 0) { + Semaphore.Release(); + } + } + + void FreeThread() { + Semaphore2.Release(); + } + + private: + TSemaphore Semaphore; + TSemaphore Semaphore2; + THolder<TAtomicCounter> Replies; + THolder<TAtomicCounter> MaxConns; + }; + + static const TString CrLf = "\r\n"; + + struct TTestRequest { + TTestRequest(ui16 port, TString content = TString()) + : Port(port) + , Content(std::move(content)) + { + } + + void CheckContinue(TSocketInput& si) { + if (Expect100Continue) { + TStringStream ss; + TString firstLine; + si.ReadLine(firstLine); + for (;;) { + TString buf; + si.ReadLine(buf); + if (buf.size() == 0) { + break; + } + ss << buf << CrLf; + } + UNIT_ASSERT_EQUAL(firstLine, "HTTP/1.1 100 Continue"); + } + } + + TString Execute() { + TSocket* s = nullptr; + THolder<TSocket> singleReqSocket; + if (KeepAliveConnection) { + if (!KeepAlivedSocket) { + KeepAlivedSocket = MakeHolder<TSocket>(TNetworkAddress("localhost", Port), TDuration::Seconds(10)); + } + s = KeepAlivedSocket.Get(); + } else { + TNetworkAddress addr("localhost", Port); + singleReqSocket.Reset(new TSocket(addr, TDuration::Seconds(10))); + s = singleReqSocket.Get(); + } + bool isPost = Type == "POST"; + TSocketInput si(*s); + + if (UseHttpOutput) { + TSocketOutput so(*s); + THttpOutput output(&so); + + output.EnableKeepAlive(KeepAliveConnection); + output.EnableCompression(EnableResponseEncoding); + + TStringStream r; + r << Type << " / HTTP/1.1" << CrLf; + r << "Host: localhost:" + ToString(Port) << CrLf; + if (isPost) { + if (ContentEncoding.size()) { + r << "Content-Encoding: " << ContentEncoding << CrLf; + } else { + r << "Transfer-Encoding: chunked" << CrLf; + } + if (Expect100Continue) { + r << "Expect: 100-continue" << CrLf; + } + } + + r << CrLf; + if (isPost) { + output.Write(r.Str()); + output.Flush(); + CheckContinue(si); + output.Write(Content); + output.Finish(); + } else { + output.Write(r.Str()); + output.Finish(); + } + } else { + TStringStream r; + r << Type << " / HTTP/1.1" << CrLf; + r << "Host: localhost:" + ToString(Port) << CrLf; + if (KeepAliveConnection) { + r << "Connection: Keep-Alive" << CrLf; + } else { + r << "Connection: Close" << CrLf; + } + if (EnableResponseEncoding) { + r << "Accept-Encoding: gzip, deflate, x-gzip, x-deflate, y-lzo, y-lzf, y-lzq, y-bzip2, y-lzma" << CrLf; + } + if (isPost && Expect100Continue) { + r << "Expect: 100-continue" << CrLf; + } + if (isPost && ContentEncoding.size() && Content.size()) { + r << "Content-Encoding: " << ContentEncoding << CrLf; + TStringStream compressedContent; + { + TZLibCompress zlib(&compressedContent); + zlib.Write(Content.data(), Content.size()); + zlib.Flush(); + zlib.Finish(); + } + r << "Content-Length: " << compressedContent.Size() << CrLf; + r << CrLf; + s->Send(r.Data(), r.Size()); + CheckContinue(si); + Hdr = r.Str(); + TString tosend = compressedContent.Str(); + s->Send(tosend.data(), tosend.size()); + } else { + if (isPost) { + r << "Content-Length: " << Content.size() << CrLf; + r << CrLf; + s->Send(r.Data(), r.Size()); + CheckContinue(si); + Hdr = r.Str(); + s->Send(Content.data(), Content.size()); + } else { + r << CrLf; + Hdr = r.Str(); + s->Send(r.Data(), r.Size()); + } + } + } + + THttpInput input(&si); + TStringStream ss; + TransferData(&input, &ss); + + return ss.Str(); + } + + TString GetDescription() const { + if (UseHttpOutput) { + TStringStream ss; + ss << (KeepAliveConnection ? "keep-alive " : "") << Type; + if (ContentEncoding.size()) { + ss << " with encoding=" << ContentEncoding; + } + return ss.Str(); + } else { + return Hdr; + } + } + + ui16 Port = 0; + bool UseHttpOutput = true; + TString Type = "GET"; + TString ContentEncoding; + TString Content; + bool KeepAliveConnection = false; + THolder<TSocket> KeepAlivedSocket; + bool EnableResponseEncoding = false; + TString Hdr; + bool Expect100Continue = false; + }; + + class TFailingMtpQueue: public TSimpleThreadPool { + private: + bool FailOnAdd_ = false; + + public: + void SetFailOnAdd(bool fail = true) { + FailOnAdd_ = fail; + } + [[nodiscard]] bool Add(IObjectInQueue* pObj) override { + if (FailOnAdd_) { + return false; + } + + return TSimpleThreadPool::Add(pObj); + } + TFailingMtpQueue() = default; + TFailingMtpQueue(IThreadFactory* pool) + : TSimpleThreadPool(pool) + { + } + }; + + TString TestData(size_t size = 5 * 4096) { + TString res; + + for (size_t i = 0; i < size; ++i) { + res += (char)i; + } + return res; + } + + Y_UNIT_TEST(TestEchoServer) { + TString res = TestData(); + TPortManager pm; + const ui16 port = pm.GetPort(); + const bool trueFalse[] = {true, false}; + + TEchoServer serverImpl(res); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).EnableCompression(true)); + + for (int i = 0; i < 2; ++i) { + UNIT_ASSERT(server.Start()); + + TTestRequest r(port); + r.Content = res; + + for (bool keepAlive : trueFalse) { + r.KeepAliveConnection = keepAlive; + + // THttpOutput use chunked stream, else use Content-Length + for (bool useHttpOutput : trueFalse) { + r.UseHttpOutput = useHttpOutput; + + for (bool enableResponseEncoding : trueFalse) { + r.EnableResponseEncoding = enableResponseEncoding; + + const TString reqTypes[] = {"GET", "POST"}; + for (const TString& reqType : reqTypes) { + r.Type = reqType; + + const TString encoders[] = {"", "deflate"}; + for (const TString& encoder : encoders) { + r.ContentEncoding = encoder; + + for (bool expect100Continue : trueFalse) { + r.Expect100Continue = expect100Continue; + TString resp = r.Execute(); + UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription()); + } + } + } + } + } + } + + server.Stop(); + } + } + + Y_UNIT_TEST(TestReusePortEnabled) { + if (!IsReusePortAvailable()) { + return; // skip test + } + TString res = TestData(); + TPortManager pm; + const ui16 port = pm.GetPort(); + + TEchoServer serverImpl(res); + TVector<THolder<THttpServer>> servers; + for (ui32 i = 0; i < 10; i++) { + servers.push_back(MakeHolder<THttpServer>(&serverImpl, THttpServer::TOptions(port).EnableReusePort(true))); + } + + for (ui32 testRun = 0; testRun < 3; testRun++) { + for (auto& server : servers) { + // start servers one at a time and check at least one of them is replying + UNIT_ASSERT(server->Start()); + + TTestRequest r(port, res); + UNIT_ASSERT_C(r.Execute() == res, "diff echo response for request:\n" + r.GetDescription()); + } + + for (auto& server : servers) { + // ping servers and stop them one at a time + // at the last iteration only one server is still working and then gets stopped as well + + TTestRequest r(port, res); + UNIT_ASSERT_C(r.Execute() == res, "diff echo response for request:\n" + r.GetDescription()); + + server->Stop(); + } + } + } + + Y_UNIT_TEST(TestReusePortDisabled) { + // check that with the ReusePort option disabled it's impossible to start two servers on the same port + // check that ReusePort option is disabled by default (don't set it explicitly in the test) + TPortManager pm; + const ui16 port = pm.GetPort(); + + TEchoServer serverImpl(TString{}); + THttpServer server1(&serverImpl, THttpServer::TOptions(port)); + THttpServer server2(&serverImpl, THttpServer::TOptions(port)); + + UNIT_ASSERT(true == server1.Start()); + UNIT_ASSERT(false == server2.Start()); + + server1.Stop(); + // Stop() is a sync call, port should be free by now + UNIT_ASSERT(true == server2.Start()); + UNIT_ASSERT(false == server1.Start()); + } + + Y_UNIT_TEST(TestFailServer) { + /** + * Emulate request processing failures + * Data should be large enough not to fit into socket buffer + **/ + TString res = TestData(10 * 1024 * 1024); + TPortManager portManager; + const ui16 port = portManager.GetPort(); + TEchoServer serverImpl(res); + THttpServer::TOptions options(port); + options.EnableKeepAlive(true); + options.EnableCompression(true); + using TFailingServerMtpQueue = TThreadPoolBinder<TFailingMtpQueue, THttpServer::ICallBack>; + THttpServer::TMtpQueueRef mainWorkers = new TFailingServerMtpQueue(&serverImpl, SystemThreadFactory()); + THttpServer::TMtpQueueRef failWorkers = new TThreadPool(SystemThreadFactory()); + THttpServer server(&serverImpl, mainWorkers, failWorkers, options); + + UNIT_ASSERT(server.Start()); + for (size_t i = 0; i < 3; ++i) { + // should fail on 2nd request + static_cast<TFailingMtpQueue*>(mainWorkers.Get())->SetFailOnAdd(i == 1); + TTestRequest r(port); + r.Content = res; + r.Type = "POST"; + TString resp = r.Execute(); + if (i == 1) { + UNIT_ASSERT(resp.Contains("Service Unavailable")); + } else { + UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription()); + } + } + server.Stop(); + } + + class TReleaseConnectionServer: public THttpServer::ICallBack { + class TRequest: public THttpClientRequestEx { + public: + bool Reply(void* /*tsr*/) override { + Output() << "HTTP/1.1 200 Ok\r\n\r\n"; + Output() << "reply"; + Output().Finish(); + + ReleaseConnection(); + + throw yexception() << "some error"; + + return true; + } + }; + + public: + TClientRequest* CreateClient() override { + return new TRequest(); + } + + void OnException() override { + ExceptionMessage = CurrentExceptionMessage(); + } + + TString ExceptionMessage; + }; + + class TResetConnectionServer: public THttpServer::ICallBack { + class TRequest: public TClientRequest { + public: + bool Reply(void* /*tsr*/) override { + Output() << "HTTP/1.1"; + ResetConnection(); + + return true; + } + }; + + public: + TClientRequest* CreateClient() override { + return new TRequest(); + } + + void OnException() override { + ExceptionMessage = CurrentExceptionMessage(); + } + + TString ExceptionMessage; + }; + + class TListenerSockAddrReplyServer: public THttpServer::ICallBack { + class TRequest: public TClientRequest { + public: + bool Reply(void* /*tsr*/) override { + Output() << "HTTP/1.1 200 Ok\r\n\r\n"; + Output() << PrintHostAndPort(*GetListenerSockAddrRef()); + + Output().Finish(); + + return true; + } + }; + + public: + TClientRequest* CreateClient() override { + return new TRequest(); + } + }; + + Y_UNIT_TEST(TTestResetConnection) { + TPortManager pm; + const ui16 port = pm.GetPort(); + + TResetConnectionServer serverImpl; + THttpServer server(&serverImpl, THttpServer::TOptions(port)); + UNIT_ASSERT(server.Start()); + + TTestRequest r(port, "request"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(r.Execute(), TSystemError, "Connection reset by peer"); + + server.Stop(); + }; + + Y_UNIT_TEST(TTestReleaseConnection) { + TPortManager pm; + const ui16 port = pm.GetPort(); + + TReleaseConnectionServer serverImpl; + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true)); + UNIT_ASSERT(server.Start()); + + TTestRequest r(port, "request"); + r.KeepAliveConnection = true; + + UNIT_ASSERT_C(r.Execute() == "reply", "diff echo response for request:\n" + r.GetDescription()); + + server.Stop(); + + UNIT_ASSERT_STRINGS_EQUAL(serverImpl.ExceptionMessage, "(yexception) some error"); + }; + + THttpInput SendRequest(TSocket& socket, ui16 port) { + TSocketInput si(socket); + TSocketOutput so(socket); + THttpOutput out(&so); + out.EnableKeepAlive(true); + out << "GET / HTTP/1.1" << CrLf; + out << "Host: localhost:" + ToString(port) << CrLf; + out << CrLf; + out.Flush(); + + THttpInput input(&si); + input.ReadAll(); + return input; + } + + THttpInput SendRequestWithBody(TSocket& socket, ui16 port, TString body) { + TSocketInput si(socket); + TSocketOutput so(socket); + THttpOutput out(&so); + out << "POST / HTTP/1.1" << CrLf; + out << "Host: localhost:" + ToString(port) << CrLf; + out << "Content-Length: " + ToString(body.size()) << CrLf; + out << CrLf; + out << body; + out.Flush(); + + THttpInput input(&si); + input.ReadAll(); + return input; + } + + Y_UNIT_TEST(TTestExpirationTimeout) { + TPortManager pm; + const ui16 port = pm.GetPort(); + + TEchoServer serverImpl("test_data"); + THttpServer::TOptions options(port); + options.nThreads = 1; + options.MaxQueueSize = 0; + options.MaxConnections = 0; + options.KeepAliveEnabled = true; + options.ExpirationTimeout = TDuration::Seconds(1); + options.PollTimeout = TDuration::MilliSeconds(100); + THttpServer server(&serverImpl, options); + UNIT_ASSERT(server.Start()); + + TSocket socket(TNetworkAddress("localhost", port), TDuration::Seconds(10)); + + SendRequest(socket, port); + SendRequest(socket, port); + + Sleep(TDuration::Seconds(5)); + UNIT_ASSERT_EXCEPTION(SendRequest(socket, port), THttpReadException); + + server.Stop(); + } + + Y_UNIT_TEST(TTestContentLengthTooLarge) { + TPortManager pm; + const ui16 port = pm.GetPort(); + + TEchoServer serverImpl("test_data"); + THttpServer::TOptions options(port); + options.nThreads = 1; + options.MaxQueueSize = 0; + options.MaxInputContentLength = 2_KB; + options.MaxConnections = 0; + options.KeepAliveEnabled = false; + options.ExpirationTimeout = TDuration::Seconds(1); + options.PollTimeout = TDuration::MilliSeconds(100); + THttpServer server(&serverImpl, options); + UNIT_ASSERT(server.Start()); + + TSocket socket(TNetworkAddress("localhost", port), TDuration::Seconds(5)); + UNIT_ASSERT_STRING_CONTAINS(SendRequestWithBody(socket, port, TString(1_KB, 'a')).FirstLine(), "HTTP/1.1 200 Ok"); + + TSocket socket2(TNetworkAddress("localhost", port), TDuration::Seconds(5)); + UNIT_ASSERT_STRING_CONTAINS(SendRequestWithBody(socket2, port, TString(10_KB, 'a')).FirstLine(), "HTTP/1.1 413 Payload Too Large"); + + server.Stop(); + } + + + Y_UNIT_TEST(TTestCloseConnectionOnRequestLimit) { + TPortManager pm; + const ui16 port = pm.GetPort(); + + TEchoServer serverImpl("test_data"); + THttpServer server(&serverImpl, THttpServer::TOptions(port).EnableKeepAlive(true).SetMaxRequestsPerConnection(2)); + UNIT_ASSERT(server.Start()); + + TSocket socket(TNetworkAddress("localhost", port), TDuration::Seconds(10)); + + UNIT_ASSERT(SendRequest(socket, port).IsKeepAlive()); + UNIT_ASSERT(!SendRequest(socket, port).IsKeepAlive()); + + UNIT_ASSERT_EXCEPTION(SendRequest(socket, port), THttpReadException); + + server.Stop(); + } + + Y_UNIT_TEST(TTestListenerSockAddrConnection) { + TPortManager pm; + const ui16 port1 = pm.GetPort(); + const ui16 port2 = pm.GetPort(); + + TListenerSockAddrReplyServer serverImpl; + THttpServer server(&serverImpl, THttpServer::TOptions().EnableKeepAlive(true).AddBindAddress("127.0.0.1", port1).AddBindAddress("127.0.0.1", port2)); + UNIT_ASSERT(server.Start()); + + TTestRequest r1(port1); + r1.KeepAliveConnection = true; + + TString resp = r1.Execute(); + UNIT_ASSERT(resp == TString::Join("127.0.0.1", ":", ToString(port1))); + + TTestRequest r2(port2); + r2.KeepAliveConnection = true; + + resp = r2.Execute(); + UNIT_ASSERT(resp == TString::Join("127.0.0.1", ":", ToString(port2))); + + server.Stop(); + }; + +#if 0 + Y_UNIT_TEST(TestSocketsLeak) { + const bool trueFalse[] = {true, false}; + TPortManager portManager; + const ui16 port = portManager.GetPort(); + TString res = TestData(25); + TSleepingServer server(3); + THttpServer::TOptions options(port); + options.MaxConnections = 1; + options.MaxQueueSize = 1; + options.MaxFQueueSize = 2; + options.nFThreads = 2; + options.KeepAliveEnabled = true; + options.RejectExcessConnections = true; + THttpServer srv(&server, options); + UNIT_ASSERT(srv.Start()); + + for (bool keepAlive : trueFalse) { + server.ResetCounters(); + TVector<TAutoPtr<IThreadFactory::IThread>> threads; + + server.Busy(3); + server.BusyThread(); + + for (size_t i = 0; i < 3; ++i) { + auto func = [&server, port, keepAlive]() { + server.BusyThread(); + THolder<TTestRequest> r = MakeHolder<TTestRequest>(port); + r->KeepAliveConnection = keepAlive; + r->Execute(); + }; + threads.push_back(SystemThreadFactory()->Run(func)); + } + + server.FreeThread(); // all threads get connection & go to processing + Sleep(TDuration::MilliSeconds(100)); + server.BusyThread(); // we wait while connections are established by the + // system and accepted by the server + server.Free(3); // we release all connections processing + + for (auto&& thread : threads) { + thread->Join(); + } + + server.Free(3); + server.FreeThread(); + + UNIT_ASSERT_EQUAL_C(server.MaxConnsCount(), 2, "we should get MaxConn notification 2 times, got " + ToString(server.MaxConnsCount())); + UNIT_ASSERT_EQUAL_C(server.RepliesCount(), 1, "only one request should have been processed, got " + ToString(server.RepliesCount())); + } + } +#endif +} diff --git a/library/cpp/http/server/options.cpp b/library/cpp/http/server/options.cpp new file mode 100644 index 0000000000..05c954384a --- /dev/null +++ b/library/cpp/http/server/options.cpp @@ -0,0 +1,43 @@ +#include "options.h" + +#include <util/string/cast.h> +#include <util/digest/numeric.h> +#include <util/network/ip.h> +#include <util/network/socket.h> +#include <util/generic/hash_set.h> +#include <util/generic/yexception.h> + +using TAddr = THttpServerOptions::TAddr; + +static inline TString AddrToString(const TAddr& addr) { + return addr.Addr + ":" + ToString(addr.Port); +} + +static inline TNetworkAddress ToNetworkAddr(const TString& address, ui16 port) { + if (address.empty() || address == TStringBuf("*")) { + return TNetworkAddress(port); + } + + return TNetworkAddress(address, port); +} + +void THttpServerOptions::BindAddresses(TBindAddresses& ret) const { + THashSet<TString> check; + + for (auto addr : BindSockaddr) { + if (!addr.Port) { + addr.Port = Port; + } + + const TString straddr = AddrToString(addr); + + if (check.find(straddr) == check.end()) { + check.insert(straddr); + ret.push_back(ToNetworkAddr(addr.Addr, addr.Port)); + } + } + + if (ret.empty()) { + ret.push_back(Host ? TNetworkAddress(Host, Port) : TNetworkAddress(Port)); + } +} diff --git a/library/cpp/http/server/options.h b/library/cpp/http/server/options.h new file mode 100644 index 0000000000..38eda0e5e7 --- /dev/null +++ b/library/cpp/http/server/options.h @@ -0,0 +1,176 @@ +#pragma once + +#include <util/network/ip.h> +#include <util/network/init.h> +#include <util/network/address.h> +#include <util/generic/size_literals.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/datetime/base.h> + +class THttpServerOptions { +public: + inline THttpServerOptions(ui16 port = 17000) noexcept + : Port(port) + { + } + + using TBindAddresses = TVector<TNetworkAddress>; + void BindAddresses(TBindAddresses& ret) const; + + inline THttpServerOptions& AddBindAddress(const TString& address, ui16 port) { + const TAddr addr = { + address, + port, + }; + + BindSockaddr.push_back(addr); + return *this; + } + + inline THttpServerOptions& AddBindAddress(const TString& address) { + return AddBindAddress(address, 0); + } + + inline THttpServerOptions& EnableKeepAlive(bool enable) noexcept { + KeepAliveEnabled = enable; + + return *this; + } + + inline THttpServerOptions& EnableCompression(bool enable) noexcept { + CompressionEnabled = enable; + + return *this; + } + + inline THttpServerOptions& EnableRejectExcessConnections(bool enable) noexcept { + RejectExcessConnections = enable; + + return *this; + } + + inline THttpServerOptions& EnableReusePort(bool enable) noexcept { + ReusePort = enable; + + return *this; + } + + inline THttpServerOptions& EnableReuseAddress(bool enable) noexcept { + ReuseAddress = enable; + + return *this; + } + + inline THttpServerOptions& SetThreads(ui32 threads) noexcept { + nThreads = threads; + + return *this; + } + + /// Default interface name to bind the server. Used when none of BindAddress are provided. + inline THttpServerOptions& SetHost(const TString& host) noexcept { + Host = host; + + return *this; + } + + /// Default port to bind the server. Used when none of BindAddress are provided. + inline THttpServerOptions& SetPort(ui16 port) noexcept { + Port = port; + + return *this; + } + + inline THttpServerOptions& SetMaxConnections(ui32 mc = 0) noexcept { + MaxConnections = mc; + + return *this; + } + + inline THttpServerOptions& SetMaxQueueSize(ui32 mqs = 0) noexcept { + MaxQueueSize = mqs; + + return *this; + } + + inline THttpServerOptions& SetClientTimeout(const TDuration& timeout) noexcept { + ClientTimeout = timeout; + + return *this; + } + + inline THttpServerOptions& SetListenBacklog(int val) noexcept { + ListenBacklog = val; + + return *this; + } + + inline THttpServerOptions& SetOutputBufferSize(size_t val) noexcept { + OutputBufferSize = val; + + return *this; + } + + inline THttpServerOptions& SetMaxInputContentLength(ui64 val) noexcept { + MaxInputContentLength = val; + + return *this; + } + + inline THttpServerOptions& SetMaxRequestsPerConnection(size_t val) noexcept { + MaxRequestsPerConnection = val; + + return *this; + } + + /// Use TElasticQueue instead of TThreadPool for request queues + inline THttpServerOptions& EnableElasticQueues(bool enable) noexcept { + UseElasticQueues = enable; + + return *this; + } + + inline THttpServerOptions& SetThreadsName(const TString& listenThreadName, const TString& requestsThreadName, const TString& failRequestsThreadName) noexcept { + ListenThreadName = listenThreadName; + RequestsThreadName = requestsThreadName; + FailRequestsThreadName = failRequestsThreadName; + + return *this; + } + + struct TAddr { + TString Addr; + ui16 Port; + }; + + typedef TVector<TAddr> TAddrs; + + bool KeepAliveEnabled = true; + bool CompressionEnabled = false; + bool RejectExcessConnections = false; + bool ReusePort = false; // set SO_REUSEPORT socket option + bool ReuseAddress = true; // set SO_REUSEADDR socket option + TAddrs BindSockaddr; + ui16 Port = 17000; // The port on which to run the web server + TString Host; // DNS entry + const char* ServerName = "YWS/1.0"; // The Web server name to return in HTTP headers + ui32 nThreads = 0; // Thread count for requests processing + ui32 MaxQueueSize = 0; // Max allowed request count in queue + ui32 nFThreads = 1; + ui32 MaxFQueueSize = 0; + ui32 MaxConnections = 100; + int ListenBacklog = SOMAXCONN; + TDuration ClientTimeout; + size_t OutputBufferSize = 0; + ui64 MaxInputContentLength = sizeof(size_t) <= 4 ? 2_GB : 64_GB; + size_t MaxRequestsPerConnection = 0; // If keep-alive is enabled, request limit before connection is closed + bool UseElasticQueues = false; + + TDuration PollTimeout; // timeout of TSocketPoller::WaitT call + TDuration ExpirationTimeout; // drop inactive connections after ExpirationTimeout (should be > 0) + + TString ListenThreadName = "HttpListen"; + TString RequestsThreadName = "HttpServer"; + TString FailRequestsThreadName = "HttpServer"; +}; diff --git a/library/cpp/http/server/response.cpp b/library/cpp/http/server/response.cpp new file mode 100644 index 0000000000..52d64c91ce --- /dev/null +++ b/library/cpp/http/server/response.cpp @@ -0,0 +1,65 @@ +#include "response.h" + +#include <util/stream/output.h> +#include <util/stream/mem.h> +#include <util/string/cast.h> + +THttpResponse& THttpResponse::AddMultipleHeaders(const THttpHeaders& headers) { + for (THttpHeaders::TConstIterator i = headers.Begin(); i != headers.End(); ++i) { + this->Headers.AddHeader(*i); + } + return *this; +} + +THttpResponse& THttpResponse::SetContentType(const TStringBuf& contentType) { + Headers.AddOrReplaceHeader(THttpInputHeader("Content-Type", ToString(contentType))); + + return *this; +} + +void THttpResponse::OutTo(IOutputStream& os) const { + TVector<IOutputStream::TPart> parts; + const size_t FIRST_LINE_PARTS = 3; + const size_t HEADERS_PARTS = Headers.Count() * 4; + const size_t CONTENT_PARTS = 5; + parts.reserve(FIRST_LINE_PARTS + HEADERS_PARTS + CONTENT_PARTS); + + // first line + parts.push_back(IOutputStream::TPart(TStringBuf("HTTP/1.1 "))); + parts.push_back(IOutputStream::TPart(HttpCodeStrEx(Code))); + parts.push_back(IOutputStream::TPart::CrLf()); + + // headers + for (THttpHeaders::TConstIterator i = Headers.Begin(); i != Headers.End(); ++i) { + parts.push_back(IOutputStream::TPart(i->Name())); + parts.push_back(IOutputStream::TPart(TStringBuf(": "))); + parts.push_back(IOutputStream::TPart(i->Value())); + parts.push_back(IOutputStream::TPart::CrLf()); + } + + char buf[50]; + + if (!Content.empty()) { + TMemoryOutput mo(buf, sizeof(buf)); + + mo << Content.size(); + + parts.push_back(IOutputStream::TPart(TStringBuf("Content-Length: "))); + parts.push_back(IOutputStream::TPart(buf, mo.Buf() - buf)); + parts.push_back(IOutputStream::TPart::CrLf()); + } + + // content + parts.push_back(IOutputStream::TPart::CrLf()); + + if (!Content.empty()) { + parts.push_back(IOutputStream::TPart(Content)); + } + + os.Write(parts.data(), parts.size()); +} + +template <> +void Out<THttpResponse>(IOutputStream& os, const THttpResponse& resp) { + resp.OutTo(os); +} diff --git a/library/cpp/http/server/response.h b/library/cpp/http/server/response.h new file mode 100644 index 0000000000..a75cb85605 --- /dev/null +++ b/library/cpp/http/server/response.h @@ -0,0 +1,82 @@ +#pragma once + +#include <library/cpp/http/misc/httpcodes.h> +#include <library/cpp/http/io/stream.h> + +#include <util/generic/strbuf.h> +#include <util/string/cast.h> + +class THttpHeaders; +class IOutputStream; + +class THttpResponse { +public: + THttpResponse() noexcept + : Code(HTTP_OK) + { + } + + explicit THttpResponse(HttpCodes code) noexcept + : Code(code) + { + } + + template <typename ValueType> + THttpResponse& AddHeader(const TString& name, const ValueType& value) { + return AddHeader(THttpInputHeader(name, ToString(value))); + } + + THttpResponse& AddHeader(const THttpInputHeader& header) { + Headers.AddHeader(header); + + return *this; + } + + THttpResponse& AddMultipleHeaders(const THttpHeaders& headers); + + const THttpHeaders& GetHeaders() const { + return Headers; + } + + THttpResponse& SetContentType(const TStringBuf& contentType); + + /** + * @note If @arg content isn't empty its size is automatically added as a + * "Content-Length" header during output to IOutputStream. + * @see IOutputStream& operator << (IOutputStream&, const THttpResponse&) + */ + THttpResponse& SetContent(const TString& content) { + Content = content; + + return *this; + } + + TString GetContent() const { + return Content; + } + + /** + * @note If @arg content isn't empty its size is automatically added as a + * "Content-Length" header during output to IOutputStream. + * @see IOutputStream& operator << (IOutputStream&, const THttpResponse&) + */ + THttpResponse& SetContent(const TString& content, const TStringBuf& contentType) { + return SetContent(content).SetContentType(contentType); + } + + HttpCodes HttpCode() const { + return Code; + } + + THttpResponse& SetHttpCode(HttpCodes code) { + Code = code; + return *this; + } + + void OutTo(IOutputStream& out) const; + +private: + HttpCodes Code; + THttpHeaders Headers; + TString Content; +}; diff --git a/library/cpp/http/server/response_ut.cpp b/library/cpp/http/server/response_ut.cpp new file mode 100644 index 0000000000..73e2112ad3 --- /dev/null +++ b/library/cpp/http/server/response_ut.cpp @@ -0,0 +1,142 @@ +#include "response.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/cast.h> + +Y_UNIT_TEST_SUITE(TestHttpResponse) { + Y_UNIT_TEST(TestCodeOnly) { + UNIT_ASSERT_STRINGS_EQUAL(ToString(THttpResponse()), "HTTP/1.1 200 Ok\r\n\r\n"); + UNIT_ASSERT_STRINGS_EQUAL(ToString(THttpResponse(HTTP_NOT_FOUND)), "HTTP/1.1 404 Not found\r\n\r\n"); + } + + Y_UNIT_TEST(TestRedirect) { + THttpResponse resp = THttpResponse(HTTP_FOUND).AddHeader("Location", "yandex.ru"); + UNIT_ASSERT_STRINGS_EQUAL(ToString(resp), "HTTP/1.1 302 Moved temporarily\r\n" + "Location: yandex.ru\r\n" + "\r\n"); + } + + Y_UNIT_TEST(TestAddHeader) { + THttpResponse resp(HTTP_FORBIDDEN); + resp.AddHeader(THttpInputHeader("X-Header-1", "ValueOne")); + resp.AddHeader("X-Header-2", 10); + resp.AddHeader("X-Header-3", true); + + const char* EXPECTED = "HTTP/1.1 403 Forbidden\r\n" + "X-Header-1: ValueOne\r\n" + "X-Header-2: 10\r\n" + "X-Header-3: 1\r\n" + "\r\n"; + UNIT_ASSERT_STRINGS_EQUAL(ToString(resp), EXPECTED); + } + + Y_UNIT_TEST(TestAddMultipleHeaders) { + THttpHeaders headers; + headers.AddHeader(THttpInputHeader("X-Header-1", "ValueOne")); + headers.AddHeader(THttpInputHeader("X-Header-2", "ValueTwo")); + headers.AddHeader(THttpInputHeader("X-Header-3", "ValueThree")); + + const char* EXPECTED = "HTTP/1.1 403 Forbidden\r\n" + "X-Header-1: ValueOne\r\n" + "X-Header-2: ValueTwo\r\n" + "X-Header-3: ValueThree\r\n" + "\r\n"; + UNIT_ASSERT_STRINGS_EQUAL(ToString(THttpResponse(HTTP_FORBIDDEN).AddMultipleHeaders(headers)), + EXPECTED); + } + + Y_UNIT_TEST(TestGetHeaders) { + THttpResponse resp(HTTP_FORBIDDEN); + + THttpHeaders headers; + headers.AddHeader(THttpInputHeader("X-Header-1", "ValueOne")); + headers.AddHeader(THttpInputHeader("X-Header-2", "ValueTwo")); + headers.AddHeader(THttpInputHeader("X-Header-3", "ValueThree")); + resp.AddMultipleHeaders(headers); + resp.AddHeader("X-Header-4", "ValueFour"); + + const THttpHeaders& gotHeaders = resp.GetHeaders(); + UNIT_ASSERT_VALUES_EQUAL(gotHeaders.Count(), 4); + UNIT_ASSERT(gotHeaders.HasHeader("X-Header-1")); + UNIT_ASSERT_STRINGS_EQUAL(gotHeaders.FindHeader("X-Header-1")->Value(), "ValueOne"); + UNIT_ASSERT(gotHeaders.HasHeader("X-Header-4")); + UNIT_ASSERT_STRINGS_EQUAL(gotHeaders.FindHeader("X-Header-4")->Value(), "ValueFour"); + } + + + Y_UNIT_TEST(TestSetContent) { + const char* EXPECTED = "HTTP/1.1 200 Ok\r\n" + "Content-Length: 10\r\n" + "\r\n" + "0123456789"; + UNIT_ASSERT_STRINGS_EQUAL(ToString(THttpResponse().SetContent("0123456789")), + EXPECTED); + } + + Y_UNIT_TEST(TestSetContentWithContentType) { + const char* EXPECTED = "HTTP/1.1 200 Ok\r\n" + "Content-Type: text/xml\r\n" + "Content-Length: 28\r\n" + "\r\n" + "<xml><tag value=\"1\" /></xml>"; + THttpResponse resp; + resp.SetContent("<xml><tag value=\"1\" /></xml>").SetContentType("text/xml"); + UNIT_ASSERT_STRINGS_EQUAL(ToString(resp), EXPECTED); + } + + Y_UNIT_TEST(TestCopyConstructor) { + THttpResponse resp(HTTP_FORBIDDEN); + resp.AddHeader(THttpInputHeader("X-Header-1", "ValueOne")) + .AddHeader("X-Header-2", "ValueTwo") + .AddHeader(THttpInputHeader("X-Header-3", "ValueThree")) + .SetContent("Some stuff") + .SetContentType("text/plain"); + + THttpResponse copy = resp; + UNIT_ASSERT_STRINGS_EQUAL(ToString(copy), ToString(resp)); + } + + Y_UNIT_TEST(TestAssignment) { + THttpResponse resp(HTTP_FORBIDDEN); + resp.AddHeader(THttpInputHeader("X-Header-1", "ValueOne")); + resp.AddHeader(THttpInputHeader("X-Header-2", "ValueTwo")); + resp.AddHeader(THttpInputHeader("X-Header-3", "ValueThree")); + resp.SetContent("Some stuff").SetContentType("text/plain"); + + THttpResponse copy; + copy = resp; + UNIT_ASSERT_STRINGS_EQUAL(ToString(copy), ToString(resp)); + } + + Y_UNIT_TEST(TestEmptyContent) { + UNIT_ASSERT_STRINGS_EQUAL(ToString(THttpResponse().SetContent("")), "HTTP/1.1 200 Ok\r\n\r\n"); + } + + Y_UNIT_TEST(TestReturnReference) { + THttpResponse resp; + UNIT_ASSERT_EQUAL(&resp, &resp.AddHeader("Header1", 1)); + UNIT_ASSERT_EQUAL(&resp, &resp.AddHeader(THttpInputHeader("Header2", "2"))); + + THttpHeaders headers; + headers.AddHeader(THttpInputHeader("Header3", "3")); + headers.AddHeader(THttpInputHeader("Header4", "4")); + UNIT_ASSERT_EQUAL(&resp, &resp.AddMultipleHeaders(headers)); + + UNIT_ASSERT_EQUAL(&resp, &resp.SetContent("some stuff")); + UNIT_ASSERT_EQUAL(&resp, &resp.SetContent("some other stuff").SetContentType("text/plain")); + } + + Y_UNIT_TEST(TestSetContentType) { + const char* EXPECTED = "HTTP/1.1 200 Ok\r\n" + "Content-Type: text/xml\r\n" + "Content-Length: 28\r\n" + "\r\n" + "<xml><tag value=\"1\" /></xml>"; + THttpResponse resp; + resp.SetContent("<xml><tag value=\"1\" /></xml>") + .SetContentType("application/json") + .SetContentType("text/xml"); + UNIT_ASSERT_STRINGS_EQUAL(ToString(resp), EXPECTED); + } +} diff --git a/library/cpp/http/server/ut/ya.make b/library/cpp/http/server/ut/ya.make new file mode 100644 index 0000000000..bcb4d4c0b8 --- /dev/null +++ b/library/cpp/http/server/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/http/server) + +OWNER(pg) + +SIZE(MEDIUM) + +SRCS( + http_ut.cpp + response_ut.cpp +) + +END() diff --git a/library/cpp/http/server/ya.make b/library/cpp/http/server/ya.make new file mode 100644 index 0000000000..bae6f33306 --- /dev/null +++ b/library/cpp/http/server/ya.make @@ -0,0 +1,27 @@ +LIBRARY() + +OWNER( + pg + mvel + kulikov + g:base + g:middle +) + +SRCS( + conn.cpp + http.cpp + http_ex.cpp + options.cpp + response.cpp +) + +PEERDIR( + library/cpp/http/misc + library/cpp/http/io + library/cpp/threading/equeue +) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/library/cpp/http/ya.make b/library/cpp/http/ya.make new file mode 100644 index 0000000000..fa2d1edef6 --- /dev/null +++ b/library/cpp/http/ya.make @@ -0,0 +1,25 @@ +RECURSE( + client + client/cookies + cookies + coro + examples + fetch + fetch_gpl + io + io/fuzz + io/list_codings + misc + multipart + push_parser + server + simple + static +) + +IF (NOT OS_WINDOWS) + RECURSE_FOR_TESTS( + io/ut + io/ut/medium + ) +ENDIF() |