diff options
author | marakasov <marakasov@yandex-team.ru> | 2022-02-10 16:49:50 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:50 +0300 |
commit | 359de4829c1d82b06fad5e0ad82470e4e9f27c89 (patch) | |
tree | a58ba6d37e60106215ea04536f6f33d3e2fe4f6f /library/cpp/http/fetch | |
parent | d94c2eed82b3c1259ac7320eb28e9731cd990c22 (diff) | |
download | ydb-359de4829c1d82b06fad5e0ad82470e4e9f27c89.tar.gz |
Restoring authorship annotation for <marakasov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
-rw-r--r-- | library/cpp/http/fetch/httpfsm.h | 4 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm.rl6 | 42 | ||||
-rw-r--r-- | library/cpp/http/fetch/httpfsm_ut.cpp | 268 |
3 files changed, 157 insertions, 157 deletions
diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h index c4abdcd0d2..e1b109d26f 100644 --- a/library/cpp/http/fetch/httpfsm.h +++ b/library/cpp/http/fetch/httpfsm.h @@ -58,8 +58,8 @@ struct THttpHeaderParser { char* hreflangpos; size_t hreflangspace; - bool AcceptingXRobots; - + bool AcceptingXRobots; + THttpAuthHeader* auth_hd; THttpRequestHeader* request_hd; diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6 index eab0328b18..e2447a6dfa 100644 --- a/library/cpp/http/fetch/httpfsm.rl6 +++ b/library/cpp/http/fetch/httpfsm.rl6 @@ -227,9 +227,9 @@ mime_type = "text/plain"i %{c(MIME_TEXT)} | "application/x-gzip"i %{c(MIME_GZIP)} | "application/json"i %{c(MIME_JSON)} | ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)} - | "application/vnd.android.package-archive"i %{c(MIME_APK)} - | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)} - ; + | "application/vnd.android.package-archive"i %{c(MIME_APK)} + | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)} + ; charset_name = token_char+ >clear_buf $update_buf; @@ -266,7 +266,7 @@ refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location; ################# x-robots-tag ################ action set_x_robots { - if (hd && AcceptingXRobots) { + if (hd && AcceptingXRobots) { if (I > 0) hd->x_robots_tag |= I; @@ -277,10 +277,10 @@ action set_x_robots { } } -action accept_x_robots { - AcceptingXRobots = (bool)I; -} - +action accept_x_robots { + AcceptingXRobots = (bool)I; +} + x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)} | "noindex"i %{c(1)} | "index"i %{c(-1)} | "nofollow"i %{c(2)} | "follow"i %{c(-2)} @@ -288,19 +288,19 @@ x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)} | "noyaca"i %{c(16)} | "noodp"i %{c(8)}; -any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*; -any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*; - -unavailable_after_directive = "unavailable_after"i def any_value; - -yandex_robot = "yandex"i | "yandexbot"i; -other_robot = any_key - "unavailable_after"i - yandex_robot; -robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)}; - -x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? ); - -x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh; - +any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*; +any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*; + +unavailable_after_directive = "unavailable_after"i def any_value; + +yandex_robot = "yandex"i | "yandexbot"i; +other_robot = any_key - "unavailable_after"i - yandex_robot; +robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)}; + +x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? ); + +x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh; + ################# rel_canonical ############### action set_canonical { if (hd && buflen < FETCHER_URL_MAX) { diff --git a/library/cpp/http/fetch/httpfsm_ut.cpp b/library/cpp/http/fetch/httpfsm_ut.cpp index b018e80101..b1904be14c 100644 --- a/library/cpp/http/fetch/httpfsm_ut.cpp +++ b/library/cpp/http/fetch/httpfsm_ut.cpp @@ -18,13 +18,13 @@ class THttpHeaderParserTestSuite: public TTestBase { UNIT_TEST(TestResponseHeaderOnRequest); UNIT_TEST(TestRequestHeaderOnResponse); UNIT_TEST(TestXRobotsTagUnknownTags); - UNIT_TEST(TestXRobotsTagMyBot); - UNIT_TEST(TestXRobotsTagOtherBot); - UNIT_TEST(TestXRobotsTagUnavailableAfterAware); - UNIT_TEST(TestXRobotsTagUnavailableAfterWorks); + UNIT_TEST(TestXRobotsTagMyBot); + UNIT_TEST(TestXRobotsTagOtherBot); + UNIT_TEST(TestXRobotsTagUnavailableAfterAware); + UNIT_TEST(TestXRobotsTagUnavailableAfterWorks); UNIT_TEST(TestXRobotsTagOverridePriority); - UNIT_TEST(TestXRobotsTagDoesNotBreakCharset); - UNIT_TEST(TestXRobotsTagAllowsMultiline); + UNIT_TEST(TestXRobotsTagDoesNotBreakCharset); + UNIT_TEST(TestXRobotsTagAllowsMultiline); UNIT_TEST(TestRelCanonical); UNIT_TEST(TestHreflang); UNIT_TEST(TestHreflangOnLongInput); @@ -50,13 +50,13 @@ public: void TestResponseHeaderOnRequest(); void TestRequestHeaderOnResponse(); void TestXRobotsTagUnknownTags(); - void TestXRobotsTagMyBot(); - void TestXRobotsTagOtherBot(); - void TestXRobotsTagUnavailableAfterAware(); - void TestXRobotsTagUnavailableAfterWorks(); + void TestXRobotsTagMyBot(); + void TestXRobotsTagOtherBot(); + void TestXRobotsTagUnavailableAfterAware(); + void TestXRobotsTagUnavailableAfterWorks(); void TestXRobotsTagOverridePriority(); - void TestXRobotsTagDoesNotBreakCharset(); - void TestXRobotsTagAllowsMultiline(); + void TestXRobotsTagDoesNotBreakCharset(); + void TestXRobotsTagAllowsMultiline(); void TestRelCanonical(); void TestHreflang(); void TestHreflangOnLongInput(); @@ -238,80 +238,80 @@ void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() { const char* headers = "HTTP/1.1 200 OK\r\n" "Content-Type: text/html\r\n" - "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n"; + "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n"; i32 result = httpHeaderParser->Execute(headers, strlen(headers)); UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); - TestFinish(); -} - -void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char* headers = - "HTTP/1.1 200 OK\r\n" - "Content-Type: text/html\r\n" - "x-robots-tag: yandex: noindex, nofollow\r\n" - "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); - TestFinish(); -} - -void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char* headers = - "HTTP/1.1 200 OK\r\n" - "Content-Type: text/html\r\n" - "x-robots-tag: google: noindex, nofollow\r\n" - "x-robots-tag: googlebot: noarchive, noodp\r\n" - "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx"); - TestFinish(); -} - -void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - // проверяем только что unavailable_after ничего не ломает - const char* headers = - "HTTP/1.1 200 OK\r\n" - "Content-Type: text/html\r\n" - "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n" - "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); - TestFinish(); -} - -void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - // пока не поддерживается - const char* headers = - "HTTP/1.1 200 OK\r\n" - "Content-Type: text/html\r\n" - "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1); - //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx"); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); TestFinish(); } +void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: yandex: noindex, nofollow\r\n" + "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: google: noindex, nofollow\r\n" + "x-robots-tag: googlebot: noarchive, noodp\r\n" + "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + // проверяем только что unavailable_after ничего не ломает + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n" + "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x"); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + // пока не поддерживается + const char* headers = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n" + "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1); + //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx"); + TestFinish(); +} + void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() { TestStart(); THttpHeader httpHeader; @@ -327,59 +327,59 @@ void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() { TestFinish(); } -void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char* headers = - "HTTP/1.1 200 OK\r\n" - "X-Robots-Tag: noarchive\r\n" - "Content-Type: application/json; charset=utf-8\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); - UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); - TestFinish(); -} - -void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() { - TestStart(); - THttpHeader httpHeader; - httpHeaderParser->Init(&httpHeader); - const char* headers = - "HTTP/1.1 200 OK\r\n" - "X-Robots-Tag\r\n" - " :\r\n" - " unavailable_since\r\n" - " :\r\n" - " ,\r\n" - " unavailable_since\r\n" - " :\r\n" - " 01 Jan 2000\r\n" - " 00:00 UTC\r\n" - " ,\r\n" - " yandexbot\r\n" - " :\r\n" - " noindex\r\n" - " ,\r\n" - " garbage\r\n" - " ,\r\n" - " nofollow\r\n" - " ,\r\n" - " other\r\n" - " bot\r\n" - " :\r\n" - " noarchive\r\n" - " ,\r\n" - "Content-Type: application/json; charset=utf-8\r\n\r\n"; - i32 result = httpHeaderParser->Execute(headers, strlen(headers)); - UNIT_ASSERT_EQUAL(result, 2); - UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); - UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); - UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); - TestFinish(); -} - +void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "X-Robots-Tag: noarchive\r\n" + "Content-Type: application/json; charset=utf-8\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); + UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); + TestFinish(); +} + +void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() { + TestStart(); + THttpHeader httpHeader; + httpHeaderParser->Init(&httpHeader); + const char* headers = + "HTTP/1.1 200 OK\r\n" + "X-Robots-Tag\r\n" + " :\r\n" + " unavailable_since\r\n" + " :\r\n" + " ,\r\n" + " unavailable_since\r\n" + " :\r\n" + " 01 Jan 2000\r\n" + " 00:00 UTC\r\n" + " ,\r\n" + " yandexbot\r\n" + " :\r\n" + " noindex\r\n" + " ,\r\n" + " garbage\r\n" + " ,\r\n" + " nofollow\r\n" + " ,\r\n" + " other\r\n" + " bot\r\n" + " :\r\n" + " noarchive\r\n" + " ,\r\n" + "Content-Type: application/json; charset=utf-8\r\n\r\n"; + i32 result = httpHeaderParser->Execute(headers, strlen(headers)); + UNIT_ASSERT_EQUAL(result, 2); + UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx"); + UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON)); + UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8)); + TestFinish(); +} + void THttpHeaderParserTestSuite::TestHreflang() { TestStart(); THttpHeader httpHeader; |