diff options
author | lapshov <lapshov@yandex-team.ru> | 2022-02-10 16:49:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:39 +0300 |
commit | 4f36f44b1e216dca1f44ada8d126e7b70f05da2f (patch) | |
tree | edbdb67ebe4b9195bf7c53bbc0a963fc03ccc17e /library/cpp/http/fetch | |
parent | 8b71ce88bea710a9663bb143e4916f961c57212e (diff) | |
download | ydb-4f36f44b1e216dca1f44ada8d126e7b70f05da2f.tar.gz |
Restoring authorship annotation for <lapshov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
-rw-r--r-- | library/cpp/http/fetch/exthttpcodes.cpp | 20 | ||||
-rw-r--r-- | library/cpp/http/fetch/exthttpcodes.h | 4 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfetcher.h | 6 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm.h | 4 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 230 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm_ut.cpp | 110 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpheader.h | 34 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpparser.h | 4 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpzreader.h | 38 | ||||
-rw-r--r-- | library/cpp/http/fetch/ut/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/http/fetch/ya.make | 4 |
11 files changed, 228 insertions, 228 deletions
diff --git a/library/cpp/http/fetch/exthttpcodes.cpp b/library/cpp/http/fetch/exthttpcodes.cpp index acc05650c8..ce9d683412 100644 --- a/library/cpp/http/fetch/exthttpcodes.cpp +++ b/library/cpp/http/fetch/exthttpcodes.cpp @@ -158,8 +158,8 @@ static ui16* prepare_flags(http_flag* arg) { // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их // всем кодам не перечисленным в таблице выше - for (size_t group = 0; group < 1000; group += 100) - for (size_t j = group + 1; j < group + 100; ++j) + for (size_t group = 0; group < 1000; group += 100) + for (size_t j = group + 1; j < group + 100; ++j) flags[j] = flags[group]; // предыдущий цикл затер некоторые флаги перечисленные в таблице выше @@ -171,12 +171,12 @@ static ui16* prepare_flags(http_flag* arg) { } ui16* http2status = prepare_flags(HTTP_FLAG); - + TStringBuf ExtHttpCodeStr(int code) noexcept { - if (code < HTTP_CODE_MAX) { + if (code < HTTP_CODE_MAX) { return HttpCodeStr(code); - } - switch (code) { + } + switch (code) { case HTTP_BAD_RESPONSE_HEADER: return TStringBuf("Bad response header"); case HTTP_CONNECTION_LOST: @@ -257,10 +257,10 @@ TStringBuf ExtHttpCodeStr(int code) noexcept { return TStringBuf("Cached copy for the url is not available"); case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING: return TStringBuf("Timed out while bytes receiving"); - + // TODO: messages for >2000 codes - + default: return TStringBuf("Unknown HTTP code"); - } -} + } +} diff --git a/library/cpp/http/fetch/exthttpcodes.h b/library/cpp/http/fetch/exthttpcodes.h index 6b525052cd..a6a0aa31be 100644 --- a/library/cpp/http/fetch/exthttpcodes.h +++ b/library/cpp/http/fetch/exthttpcodes.h @@ -4,7 +4,7 @@ #include <library/cpp/http/misc/httpcodes.h> enum ExtHttpCodes { - // Custom + // Custom HTTP_EXTENDED = 1000, HTTP_BAD_RESPONSE_HEADER = 1000, HTTP_CONNECTION_LOST = 1001, @@ -137,5 +137,5 @@ static inline int Http2Status(int code) { extern ui16* http2status; return http2status[code & (EXT_HTTP_CODE_MAX - 1)]; } - + TStringBuf ExtHttpCodeStr(int code) noexcept; diff --git a/library/cpp/http/fetch/httpfetcher.h b/library/cpp/http/fetch/httpfetcher.h index 7fc251afd2..8e1efdc0c2 100644 --- a/library/cpp/http/fetch/httpfetcher.h +++ b/library/cpp/http/fetch/httpfetcher.h @@ -52,7 +52,7 @@ public: THttpParser<TCheck>::Init(header, head_request); const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme()); - size_t schemelen = strlen(scheme); + size_t schemelen = strlen(scheme); if (*path == '/') { header->base = TStringBuf(scheme, schemelen); header->base += TStringBuf("://", 3); @@ -123,8 +123,8 @@ public: header->error = HTTP_HEADER_TOO_LARGE; break; } - } - if (!inheader) { + } + if (!inheader) { maxsize = TCheck::GetMaxBodySize(header); } if (header->http_status >= HTTP_EXTENDED) diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h index c4abdcd0d2..9d4262f88f 100644 --- a/library/cpp/http/fetch/httpfsm.h +++ b/library/cpp/http/fetch/httpfsm.h @@ -5,8 +5,8 @@ #include <util/system/maxlen.h> #include <util/datetime/parser.h> -#include <time.h> - +#include <time.h> + struct THttpHeaderParser { static constexpr int ErrFirstlineTypeMismatch = -3; static constexpr int ErrHeader = -2; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index eab0328b18..79fc390efb 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -128,8 +128,8 @@ connection = "connection"i def %beg_connection c_tokenlist eoh %set_connect ################# content-encoding ################ action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; } action set_content_encoding { base_hd->compression_method = - ((base_hd->compression_method == HTTP_COMPRESSION_UNSET || - base_hd->compression_method == I) ? + ((base_hd->compression_method == HTTP_COMPRESSION_UNSET || + base_hd->compression_method == I) ? I : (int)HTTP_COMPRESSION_ERROR); } ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)} @@ -189,49 +189,49 @@ action set_charset { } } -mime_type = "text/plain"i %{c(MIME_TEXT)} - | "text/html"i %{c(MIME_HTML)} - | "application/pdf"i %{c(MIME_PDF)} - | "application/rtf"i %{c(MIME_RTF)} - | "text/rtf"i %{c(MIME_RTF)} - | "application/msword"i %{c(MIME_DOC)} - | "audio/mpeg"i %{c(MIME_MPEG)} - | "text/xml"i %{c(MIME_XML)} - | "application/xml"i %{c(MIME_XML)} - | "application/rss+xml"i %{c(MIME_RSS)} - | "application/rdf+xml"i %{c(MIME_RSS)} - | "application/atom+xml"i %{c(MIME_RSS)} - | "text/vnd.wap.wml"i %{c(MIME_WML)} - | "application/x-shockwave-flash"i %{c(MIME_SWF)} - | "application/vnd.ms-excel"i %{c(MIME_XLS)} - | "application/vnd.ms-powerpoint"i %{c(MIME_PPT)} - | "image/jpeg"i %{c(MIME_IMAGE_JPG)} - | "image/jpg"i %{c(MIME_IMAGE_JPG)} - | "image/pjpeg"i %{c(MIME_IMAGE_PJPG)} - | "image/png"i %{c(MIME_IMAGE_PNG)} - | "image/gif"i %{c(MIME_IMAGE_GIF)} - | "application/xhtml+xml"i %{c(MIME_XHTMLXML)} - | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"i %{c(MIME_DOCX)} - | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"i %{c(MIME_XLSX)} - | "application/vnd.openxmlformats-officedocument.presentationml.presentation"i %{c(MIME_PPTX)} - | "application/vnd.oasis.opendocument.text"i %{c(MIME_ODT)} - | "application/vnd.oasis.opendocument.presentation"i %{c(MIME_ODP)} - | "application/vnd.oasis.opendocument.spreadsheet"i %{c(MIME_ODS)} - | "application/vnd.oasis.opendocument.graphics"i %{c(MIME_ODG)} - | "image/x-ms-bmp"i %{c(MIME_IMAGE_BMP)} - | "image/bmp"i %{c(MIME_IMAGE_BMP)} - | "audio/x-wav"i %{c(MIME_WAV)} - | ( "application/x-tar"i | "application/x-ustar"i | "application/x-gtar"i | "application/zip"i | "application/x-archive"i - | "application/x-bzip2"i | "application/x-rar"i ) %{c(MIME_ARCHIVE)} - | "application/x-dosexec"i %{c(MIME_EXE)} +mime_type = "text/plain"i %{c(MIME_TEXT)} + | "text/html"i %{c(MIME_HTML)} + | "application/pdf"i %{c(MIME_PDF)} + | "application/rtf"i %{c(MIME_RTF)} + | "text/rtf"i %{c(MIME_RTF)} + | "application/msword"i %{c(MIME_DOC)} + | "audio/mpeg"i %{c(MIME_MPEG)} + | "text/xml"i %{c(MIME_XML)} + | "application/xml"i %{c(MIME_XML)} + | "application/rss+xml"i %{c(MIME_RSS)} + | "application/rdf+xml"i %{c(MIME_RSS)} + | "application/atom+xml"i %{c(MIME_RSS)} + | "text/vnd.wap.wml"i %{c(MIME_WML)} + | "application/x-shockwave-flash"i %{c(MIME_SWF)} + | "application/vnd.ms-excel"i %{c(MIME_XLS)} + | "application/vnd.ms-powerpoint"i %{c(MIME_PPT)} + | "image/jpeg"i %{c(MIME_IMAGE_JPG)} + | "image/jpg"i %{c(MIME_IMAGE_JPG)} + | "image/pjpeg"i %{c(MIME_IMAGE_PJPG)} + | "image/png"i %{c(MIME_IMAGE_PNG)} + | "image/gif"i %{c(MIME_IMAGE_GIF)} + | "application/xhtml+xml"i %{c(MIME_XHTMLXML)} + | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"i %{c(MIME_DOCX)} + | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"i %{c(MIME_XLSX)} + | "application/vnd.openxmlformats-officedocument.presentationml.presentation"i %{c(MIME_PPTX)} + | "application/vnd.oasis.opendocument.text"i %{c(MIME_ODT)} + | "application/vnd.oasis.opendocument.presentation"i %{c(MIME_ODP)} + | "application/vnd.oasis.opendocument.spreadsheet"i %{c(MIME_ODS)} + | "application/vnd.oasis.opendocument.graphics"i %{c(MIME_ODG)} + | "image/x-ms-bmp"i %{c(MIME_IMAGE_BMP)} + | "image/bmp"i %{c(MIME_IMAGE_BMP)} + | "audio/x-wav"i %{c(MIME_WAV)} + | ( "application/x-tar"i | "application/x-ustar"i | "application/x-gtar"i | "application/zip"i | "application/x-archive"i + | "application/x-bzip2"i | "application/x-rar"i ) %{c(MIME_ARCHIVE)} + | "application/x-dosexec"i %{c(MIME_EXE)} | "application/x-gzip"i %{c(MIME_GZIP)} | "application/json"i %{c(MIME_JSON)} | ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)} | "application/vnd.android.package-archive"i %{c(MIME_APK)} | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)} ; - - + + charset_name = token_char+ >clear_buf $update_buf; mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2 | token ws* '=' ws* '"'? token '"'? @1 @@ -249,9 +249,9 @@ last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime; ################# location ######################## action set_location { - while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) { - buflen --; - } + while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) { + buflen --; + } if (hd && buflen < FETCHER_URL_MAX) { hd->location = TStringBuf(buf, buflen); } @@ -259,34 +259,34 @@ action set_location { action set_status_303{ if (hd) hd->http_status = 303; } -url = url_char+ >clear_buf $update_buf; -loc_url = any_text_char+ >clear_buf $update_buf; -location = "location"i def loc_url eoh %set_location; +url = url_char+ >clear_buf $update_buf; +loc_url = any_text_char+ >clear_buf $update_buf; +location = "location"i def loc_url eoh %set_location; refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; ################# x-robots-tag ################ action set_x_robots { if (hd && AcceptingXRobots) { - if (I > 0) - hd->x_robots_tag |= I; - - int pos = (I > 0 ? I : -I); - for (size_t i = 0; i < 5; ++i) - if (abs(pos) & (1 << i)) // permissive flags take priority - hd->x_robots_state[i] = (I < 0) ? '1' : (hd->x_robots_state[i] != '1') ? '0' : '1'; - } + if (I > 0) + hd->x_robots_tag |= I; + + int pos = (I > 0 ? I : -I); + for (size_t i = 0; i < 5; ++i) + if (abs(pos) & (1 << i)) // permissive flags take priority + hd->x_robots_state[i] = (I < 0) ? '1' : (hd->x_robots_state[i] != '1') ? '0' : '1'; + } } action accept_x_robots { AcceptingXRobots = (bool)I; } -x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)} - | "noindex"i %{c(1)} | "index"i %{c(-1)} - | "nofollow"i %{c(2)} | "follow"i %{c(-2)} - | "noarchive"i %{c(4)} | "archive"i %{c(-4)} +x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)} + | "noindex"i %{c(1)} | "index"i %{c(-1)} + | "nofollow"i %{c(2)} | "follow"i %{c(-2)} + | "noarchive"i %{c(4)} | "archive"i %{c(-4)} | "noyaca"i %{c(16)} - | "noodp"i %{c(8)}; + | "noodp"i %{c(8)}; any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*; any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*; @@ -311,12 +311,12 @@ action set_canonical { rel_canonical = "link"i def '<' url ">;"i lws "rel"i lws '=' lws "\"canonical\"" eoh %set_canonical; ################# hreflang ############### action set_hreflang { - bool first = (hreflangpos == hd->hreflangs); + bool first = (hreflangpos == hd->hreflangs); size_t len2 = (first ? 0 : 1) + langlen + 1 + buflen; if (langlen && len2 < hreflangspace) { - if (!first) { - *(hreflangpos++) = '\t'; - } + if (!first) { + *(hreflangpos++) = '\t'; + } memcpy(hreflangpos, langstart, langlen); hreflangpos += langlen; *(hreflangpos++) = ' '; @@ -342,13 +342,13 @@ hreflang = "link"i def '<' url '>' lws ";" lws ( ( "rel"i lws '=' lws quote "alternate" quote lws ';' lws "hreflang"i lws '=' lws quote lang quote ) | ( "hreflang"i lws '=' lws quote lang quote lws ';' lws "rel"i lws '=' lws quote "alternate" quote ) ) eoh %set_hreflang; -################# squid_error ################# -action set_squid_error { - hd->squid_error = 1; -} - -squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; - +################# squid_error ################# +action set_squid_error { + hd->squid_error = 1; +} + +squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error; + ################# auth ######################## action init_auth { if (auth_hd) @@ -441,20 +441,20 @@ action set_user_agent { user_agent = any_text_char* >clear_buf $update_buf; user_agent_header = "user-agent"i def user_agent eoh %set_user_agent; -############### x-yandex-langregion ################ -action set_langregion { - if (request_hd && buflen < MAX_LANGREGION_LEN) { - buf[buflen++] = 0; - if (request_hd->x_yandex_langregion[0] != 0) { - return -2; - } - memcpy(request_hd->x_yandex_langregion, buf, buflen); - } -} - -langregion = any_text_char* >clear_buf $update_buf; -langregion_header = "x-yandex-langregion"i def langregion eoh %set_langregion; - +############### x-yandex-langregion ################ +action set_langregion { + if (request_hd && buflen < MAX_LANGREGION_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_langregion[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_langregion, buf, buflen); + } +} + +langregion = any_text_char* >clear_buf $update_buf; +langregion_header = "x-yandex-langregion"i def langregion eoh %set_langregion; + ############### x-yandex-sourcename ################ action set_sourcename { if (request_hd && buflen < MAXWORD_LEN) { @@ -469,34 +469,34 @@ action set_sourcename { sourcename = any_text_char* >clear_buf $update_buf; sourcename_header = "x-yandex-sourcename"i def sourcename eoh %set_sourcename; -############### x-yandex-requesttype ############### -action set_requesttype { - if (request_hd && buflen < MAXWORD_LEN) { - buf[buflen++] = 0; - if (request_hd->x_yandex_requesttype[0] != 0) { - return -2; - } - memcpy(request_hd->x_yandex_requesttype, buf, buflen); - } -} - -requesttype = any_text_char* >clear_buf $update_buf; -requesttype_header = "x-yandex-requesttype"i def requesttype eoh %set_requesttype; - -################ x-yandex-fetchoptions ############### -action set_fetchoptions { - if (request_hd && buflen < MAXWORD_LEN) { - buf[buflen++] = 0; - if (request_hd->x_yandex_fetchoptions[0] != 0) { - return -2; - } - memcpy(request_hd->x_yandex_fetchoptions, buf, buflen); - } -} - -fetchoptions = any_text_char* >clear_buf $update_buf; -fetchoptions_header = "x-yandex-fetchoptions"i def fetchoptions eoh %set_fetchoptions; - +############### x-yandex-requesttype ############### +action set_requesttype { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_requesttype[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_requesttype, buf, buflen); + } +} + +requesttype = any_text_char* >clear_buf $update_buf; +requesttype_header = "x-yandex-requesttype"i def requesttype eoh %set_requesttype; + +################ x-yandex-fetchoptions ############### +action set_fetchoptions { + if (request_hd && buflen < MAXWORD_LEN) { + buf[buflen++] = 0; + if (request_hd->x_yandex_fetchoptions[0] != 0) { + return -2; + } + memcpy(request_hd->x_yandex_fetchoptions, buf, buflen); + } +} + +fetchoptions = any_text_char* >clear_buf $update_buf; +fetchoptions_header = "x-yandex-fetchoptions"i def fetchoptions eoh %set_fetchoptions; + ################ if-modified-since ################ action set_if_modified_since { if (request_hd) { @@ -576,7 +576,7 @@ message_header = other_header $0 response_header = message_header $0 | auth @1 | accept_ranges @1 - | location @1 + | location @1 | x_robots_tag @1 | rel_canonical @1 | hreflang @1 @@ -588,9 +588,9 @@ request_header = message_header $0 | host_header @1 | user_agent_header @1 | sourcename_header @1 - | requesttype_header @1 - | langregion_header @1 - | fetchoptions_header @1 + | requesttype_header @1 + | langregion_header @1 + | fetchoptions_header @1 | if_modified_since @1 | request_cache_control @1 | response_timeout @1 diff --git a/library/cpp/http/fetch/httpfsm_ut.cpp b/library/cpp/http/fetch/httpfsm_ut.cpp index b018e80101..05fede5a02 100644 --- a/library/cpp/http/fetch/httpfsm_ut.cpp +++ b/library/cpp/http/fetch/httpfsm_ut.cpp @@ -17,19 +17,19 @@ class THttpHeaderParserTestSuite: public TTestBase { UNIT_TEST(TestLastModifiedCorrupted); UNIT_TEST(TestResponseHeaderOnRequest); UNIT_TEST(TestRequestHeaderOnResponse); - UNIT_TEST(TestXRobotsTagUnknownTags); + UNIT_TEST(TestXRobotsTagUnknownTags); UNIT_TEST(TestXRobotsTagMyBot); UNIT_TEST(TestXRobotsTagOtherBot); UNIT_TEST(TestXRobotsTagUnavailableAfterAware); UNIT_TEST(TestXRobotsTagUnavailableAfterWorks); - UNIT_TEST(TestXRobotsTagOverridePriority); + UNIT_TEST(TestXRobotsTagOverridePriority); UNIT_TEST(TestXRobotsTagDoesNotBreakCharset); UNIT_TEST(TestXRobotsTagAllowsMultiline); UNIT_TEST(TestRelCanonical); UNIT_TEST(TestHreflang); UNIT_TEST(TestHreflangOnLongInput); UNIT_TEST(TestMimeType); - UNIT_TEST(TestRepeatedContentEncoding); + UNIT_TEST(TestRepeatedContentEncoding); UNIT_TEST_SUITE_END(); private: @@ -49,19 +49,19 @@ public: void TestLastModifiedCorrupted(); void TestResponseHeaderOnRequest(); void TestRequestHeaderOnResponse(); - void TestXRobotsTagUnknownTags(); + void TestXRobotsTagUnknownTags(); void TestXRobotsTagMyBot(); void TestXRobotsTagOtherBot(); void TestXRobotsTagUnavailableAfterAware(); void TestXRobotsTagUnavailableAfterWorks(); - void TestXRobotsTagOverridePriority(); + void TestXRobotsTagOverridePriority(); void TestXRobotsTagDoesNotBreakCharset(); void TestXRobotsTagAllowsMultiline(); void TestRelCanonical(); void TestHreflang(); void TestHreflangOnLongInput(); void TestMimeType(); - void TestRepeatedContentEncoding(); + void TestRepeatedContentEncoding(); }; void THttpHeaderParserTestSuite::TestStart() { @@ -91,8 +91,8 @@ void THttpHeaderParserTestSuite::TestRequestHeader() { UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority, DEFAULT_REQUEST_PRIORITY); UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, ""), 0); - UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, ""), 0); - UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, ""), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, ""), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, ""), 0); TestFinish(); UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, DEFAULT_MAX_AGE); } @@ -161,8 +161,8 @@ void THttpHeaderParserTestSuite::TestProxyRequestHeader() { "X-Yandex-Response-Timeout: 1000\r\n" "X-Yandex-Request-Priority: 2\r\n" "X-Yandex-Sourcename: orange\r\n" - "X-Yandex-Requesttype: userproxy\r\n" - "X-Yandex-FetchOptions: d;c\r\n" + "X-Yandex-Requesttype: userproxy\r\n" + "X-Yandex-FetchOptions: d;c\r\n" "Cache-control: max-age=100\r\n" "If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT\r\n" "User-Agent: Yandex/1.01.001 (compatible; Win16; I)\r\n" @@ -172,9 +172,9 @@ void THttpHeaderParserTestSuite::TestProxyRequestHeader() { UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET); UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_response_timeout, 1000); UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority, 2); - UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, "orange"), 0); - UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, "userproxy"), 0); - UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, "d;c"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, "orange"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, "userproxy"), 0); + UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, "d;c"), 0); UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, 100); UNIT_ASSERT_VALUES_EQUAL(httpRequestHeader.if_modified_since, TInstant::ParseIso8601Deprecated("1994-10-29 19:43:31Z").TimeT()); @@ -231,7 +231,7 @@ void THttpHeaderParserTestSuite::TestLastModifiedCorrupted() { TestFinish(); } -void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() { +void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() { TestStart(); THttpHeader httpHeader; httpHeaderParser->Init(&httpHeader); @@ -312,21 +312,21 @@ void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() { TestFinish(); } -void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char* headers = - "HTTP/1.1 200 OK\r\n" - "Content-Type: text/html\r\n" - "x-robots-tag: all, none\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "11xxx"); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); // NOTE legacy behavior, should be 0 as `all` overrides - TestFinish(); -} - +void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: all, none\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "11xxx"); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); // NOTE legacy behavior, should be 0 as `all` overrides + TestFinish(); +} + void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() { TestStart(); THttpHeader httpHeader; @@ -394,7 +394,7 @@ void THttpHeaderParserTestSuite::TestHreflang() { i32 result = httpHeaderParser->Execute(headers, strlen(headers)); UNIT_ASSERT_VALUES_EQUAL(result, 2); // UNIT_ASSERT_VALUES_EQUAL(strcmp(httpHeader.hreflangs, "x-default http://www.high.ru/;"), 0); - UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, "x-default http://www.high.ru/\ten_GB http://www.high.ru/en.html\tru_RU.KOI8-r http://www.high.ru/ru.html"); + UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, "x-default http://www.high.ru/\ten_GB http://www.high.ru/en.html\tru_RU.KOI8-r http://www.high.ru/ru.html"); TestFinish(); } @@ -461,31 +461,31 @@ void THttpHeaderParserTestSuite::TestMimeType() { TestFinish(); } -void THttpHeaderParserTestSuite::TestRepeatedContentEncoding() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char *headers = - "HTTP/1.1 200 OK\r\n" - "Server: nginx\r\n" - "Date: Mon, 15 Oct 2018 10:40:44 GMT\r\n" - "Content-Type: text/plain\r\n" - "Transfer-Encoding: chunked\r\n" - "Connection: keep-alive\r\n" - "Last-Modified: Mon, 15 Oct 2018 03:48:54 GMT\r\n" - "ETag: W/\"5bc40e26-a956d\"\r\n" - "X-Autoru-LB: lb-03-sas.prod.vertis.yandex.net\r\n" - "Content-Encoding: gzip\r\n" - "Content-Encoding: gzip\r\n" - "X-UA-Bot: 1\r\n" - "\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.error, 0); - UNIT_ASSERT_EQUAL(httpHeader.compression_method, 3); - TestFinish(); -} - +void THttpHeaderParserTestSuite::TestRepeatedContentEncoding() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char *headers = + "HTTP/1.1 200 OK\r\n" + "Server: nginx\r\n" + "Date: Mon, 15 Oct 2018 10:40:44 GMT\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "Connection: keep-alive\r\n" + "Last-Modified: Mon, 15 Oct 2018 03:48:54 GMT\r\n" + "ETag: W/\"5bc40e26-a956d\"\r\n" + "X-Autoru-LB: lb-03-sas.prod.vertis.yandex.net\r\n" + "Content-Encoding: gzip\r\n" + "Content-Encoding: gzip\r\n" + "X-UA-Bot: 1\r\n" + "\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.error, 0); + UNIT_ASSERT_EQUAL(httpHeader.compression_method, 3); + TestFinish(); +} + UNIT_TEST_SUITE_REGISTRATION(THttpHeaderParserTestSuite); Y_UNIT_TEST_SUITE(TestHttpChunkParser) { diff --git a/library/cpp/http/fetch/httpheader.h b/library/cpp/http/fetch/httpheader.h index b2810bbd41..ef4519ead4 100644 --- a/library/cpp/http/fetch/httpheader.h +++ b/library/cpp/http/fetch/httpheader.h @@ -5,9 +5,9 @@ #include <library/cpp/mime/types/mime.h> #include <util/system/defaults.h> -#include <util/system/compat.h> +#include <util/system/compat.h> #include <util/generic/string.h> -#include <util/generic/ylimits.h> +#include <util/generic/ylimits.h> #include <util/system/maxlen.h> #include <ctime> @@ -26,9 +26,9 @@ extern const i8 DEFAULT_REQUEST_PRIORITY; /// == -1 extern const i32 DEFAULT_RESPONSE_TIMEOUT; /// == -1 #define HTTP_PREFIX "http://" -#define MAX_LANGREGION_LEN 4 +#define MAX_LANGREGION_LEN 4 #define MAXWORD_LEN 55 - + enum HTTP_COMPRESSION { HTTP_COMPRESSION_UNSET = 0, HTTP_COMPRESSION_ERROR = 1, @@ -105,9 +105,9 @@ public: printf("compression_method: %" PRIi8 "\n", compression_method); printf("transfer_chunked: %" PRIi8 "\n", transfer_chunked); printf("connection_closed: %" PRIi8 "\n", connection_closed); - printf("content_range_start: %" PRIi64 "\n", content_range_start); - printf("content_range_end: %" PRIi64 "\n", content_range_end); - printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length); + printf("content_range_start: %" PRIi64 "\n", content_range_start); + printf("content_range_end: %" PRIi64 "\n", content_range_end); + printf("content_range_entity_length: %" PRIi64 "\n", content_range_entity_length); printf("base: \"%s\"\n", base.c_str()); printf("error: %" PRIi16 "\n", error); } @@ -132,26 +132,26 @@ struct THttpHeader: public THttpBaseHeader { public: i8 accept_ranges; i8 squid_error; - i8 x_robots_tag; // deprecated, use x_robots_state instead + i8 x_robots_tag; // deprecated, use x_robots_state instead i16 http_status; TString location; TString rel_canonical; char hreflangs[HREFLANG_MAX]; i64 retry_after; - TString x_robots_state; // 'xxxxx' format, see `library/html/zoneconf/parsefunc.cpp` + TString x_robots_state; // 'xxxxx' format, see `library/html/zoneconf/parsefunc.cpp` public: void Init() { THttpBaseHeader::Init(); accept_ranges = -1; - squid_error = 0; + squid_error = 0; x_robots_tag = 0; rel_canonical.clear(); http_status = -1; location.clear(); hreflangs[0] = 0; retry_after = DEFAULT_RETRY_AFTER; - x_robots_state = "xxxxx"; + x_robots_state = "xxxxx"; } void Print() const { @@ -170,10 +170,10 @@ public: char host[HOST_MAX]; char from[MAXWORD_LEN]; char user_agent[MAXWORD_LEN]; - char x_yandex_langregion[MAX_LANGREGION_LEN]; + char x_yandex_langregion[MAX_LANGREGION_LEN]; char x_yandex_sourcename[MAXWORD_LEN]; - char x_yandex_requesttype[MAXWORD_LEN]; - char x_yandex_fetchoptions[MAXWORD_LEN]; + char x_yandex_requesttype[MAXWORD_LEN]; + char x_yandex_fetchoptions[MAXWORD_LEN]; i8 http_method; i8 x_yandex_request_priority; i32 x_yandex_response_timeout; @@ -190,10 +190,10 @@ public: host[0] = 0; from[0] = 0; user_agent[0] = 0; - x_yandex_langregion[0] = 0; + x_yandex_langregion[0] = 0; x_yandex_sourcename[0] = 0; - x_yandex_requesttype[0] = 0; - x_yandex_fetchoptions[0] = 0; + x_yandex_requesttype[0] = 0; + x_yandex_fetchoptions[0] = 0; http_method = HTTP_METHOD_UNDEFINED; x_yandex_request_priority = DEFAULT_REQUEST_PRIORITY; x_yandex_response_timeout = DEFAULT_RESPONSE_TIMEOUT; diff --git a/library/cpp/http/fetch/httpparser.h b/library/cpp/http/fetch/httpparser.h index 769828e4ae..25a5b4385c 100644 --- a/library/cpp/http/fetch/httpparser.h +++ b/library/cpp/http/fetch/httpparser.h @@ -4,7 +4,7 @@ #include "httpheader.h" #include <library/cpp/mime/types/mime.h> -#include <util/system/yassert.h> +#include <util/system/yassert.h> #include <library/cpp/http/misc/httpcodes.h> template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20> @@ -294,7 +294,7 @@ public: Header = H; Eoferr = 1; Size = 0; - AssumeConnectionClosed = assumeConnectionClosed; + AssumeConnectionClosed = assumeConnectionClosed; HeadRequest = headRequest; return parsHeader ? ParseHeader() : SkipHeader(); } diff --git a/library/cpp/http/fetch/httpzreader.h b/library/cpp/http/fetch/httpzreader.h index 68eb00853d..fcb95baa56 100644 --- a/library/cpp/http/fetch/httpzreader.h +++ b/library/cpp/http/fetch/httpzreader.h @@ -4,9 +4,9 @@ #include "httpparser.h" #include "exthttpcodes.h" -#include <util/system/defaults.h> -#include <util/generic/yexception.h> - +#include <util/system/defaults.h> +#include <util/generic/yexception.h> + #include <contrib/libs/zlib/zlib.h> #include <errno.h> @@ -35,8 +35,8 @@ public: , MaxContSize(0) , Buf(nullptr) , ZErr(0) - , ConnectionClosed(0) - , IgnoreTrailingGarbage(true) + , ConnectionClosed(0) + , IgnoreTrailingGarbage(true) { memset(&Stream, 0, sizeof(Stream)); } @@ -50,14 +50,14 @@ public: } } - void SetConnectionClosed(int cc) { - ConnectionClosed = cc; - } - - void SetIgnoreTrailingGarbage(bool ignore) { - IgnoreTrailingGarbage = ignore; - } - + void SetConnectionClosed(int cc) { + ConnectionClosed = cc; + } + + void SetIgnoreTrailingGarbage(bool ignore) { + IgnoreTrailingGarbage = ignore; + } + int Init( THttpHeader* H, int parsHeader, @@ -122,11 +122,11 @@ public: return -1; } if (!IgnoreTrailingGarbage && BufSize == Stream.avail_out && Stream.avail_in > 0) { - Header->error = EXT_HTTP_GZIPERROR; - ZErr = EFAULT; + Header->error = EXT_HTTP_GZIPERROR; + ZErr = EFAULT; Stream.msg = (char*)"trailing garbage"; - return -1; - } + return -1; + } return long(BufSize - Stream.avail_out); case Z_NEED_DICT: @@ -236,8 +236,8 @@ protected: size_t CurContSize, MaxContSize; ui8* Buf; int ZErr; - int ConnectionClosed; - bool IgnoreTrailingGarbage; + int ConnectionClosed; + bool IgnoreTrailingGarbage; }; class zlib_exception: public yexception { diff --git a/library/cpp/http/fetch/ut/ya.make b/library/cpp/http/fetch/ut/ya.make index 7486986b36..42392f2ee0 100644 --- a/library/cpp/http/fetch/ut/ya.make +++ b/library/cpp/http/fetch/ut/ya.make @@ -1,7 +1,7 @@ UNITTEST_FOR(library/cpp/http/fetch) OWNER( - g:zora + g:zora ) SRCS( diff --git a/library/cpp/http/fetch/ya.make b/library/cpp/http/fetch/ya.make index 7737127463..99cf53da03 100644 --- a/library/cpp/http/fetch/ya.make +++ b/library/cpp/http/fetch/ya.make @@ -1,7 +1,7 @@ LIBRARY() OWNER( - g:zora + g:zora ) PEERDIR( @@ -26,7 +26,7 @@ SRCS( httpheader.h httpparser.h httpzreader.h - sockhandler.h + sockhandler.h ) GENERATE_ENUM_SERIALIZATION(httpheader.h) |